diff --git a/.codecov.yml b/.codecov.yml
index 5fb5a76bb5..fcf754bf7d 100755
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -1,42 +1,46 @@
 #see https://github.com/codecov/support/wiki/Codecov-Yaml
 codecov:
-  notify:
-    require_ci_to_pass: yes
+  require_ci_to_pass: yes
 
 coverage:
-  precision: 2  # 2 = xx.xx%, 0 = xx%
-  round: nearest # how coverage is rounded: down/up/nearest
-  range: 10...90 # custom range of coverage colors from red -> yellow -> green
+
+  # 2 = xx.xx%, 0 = xx%
+  precision: 2
+
+  # https://docs.codecov.com/docs/commit-status
   status:
-    # https://codecov.readme.io/v1.0/docs/commit-status
+
+    # We want our total main project to always remain above 87% coverage, a
+    # drop of 0.20% is allowed. It should fail if coverage couldn't be uploaded
+    # of the CI fails otherwise
     project:
       default:
-        against: auto
-        target: 70% # specify the target coverage for each commit status
-        threshold: 50% # allow this little decrease on project
-        # https://github.com/codecov/support/wiki/Filtering-Branches
-        # branches: master
+        target: 87%
+        threshold: 0.20%
+        if_not_found: failure
         if_ci_failed: error
-    # https://github.com/codecov/support/wiki/Patch-Status
+
+    # The code changed by a PR should have 90% coverage. This is different from the
+    # overall number shown above.
+    # This encourages small PR's as they are easier to test.
     patch:
       default:
-        against: auto
-        target: 30% # specify the target "X%" coverage to hit
-        threshold: 50% # allow this much decrease on patch
-    changes: false
+        target: 90%
+        if_not_found: failure
+        if_ci_failed: error
 
+# We upload additional information on branching with pytest-cov `--cov-branch`
+# This information can be used by codecov.com to increase analysis of code
 parsers:
   gcov:
     branch_detection:
       conditional: true
       loop: true
+      method: true
       macro: false
-      method: false
-  javascript:
-    enable_partials: false
+
 
 comment:
-  layout: header, diff
+  layout: diff, reach
+  behavior: default
   require_changes: false
-  behavior: default  # update if exists else create new
-  branches: *
\ No newline at end of file
diff --git a/.flake8 b/.flake8
index 704b67af15..f3a26a3b56 100644
--- a/.flake8
+++ b/.flake8
@@ -1,7 +1,12 @@
 [flake8]
-max-line-length = 100
 show-source = True
-application-import-names = autosklearn
-exclude =
+max-line-length = 88
+extend-exclude =
     venv
+    .venv
     build
+extend-ignore =
+    # No whitespace before ':' in [x : y]
+    E203
+    # No lambdas — too strict
+    E731
diff --git a/.github/workflows/benchmarking-files/regressions-util.py b/.github/workflows/benchmarking-files/regressions-util.py
index c18635ca43..5128d0bbbe 100644
--- a/.github/workflows/benchmarking-files/regressions-util.py
+++ b/.github/workflows/benchmarking-files/regressions-util.py
@@ -7,34 +7,36 @@
 import numpy as np
 import pandas as pd
 
-CLASSIFICATION_METRICS = ['acc', 'auc', 'balacc', 'logloss']
-REGRESSION_METRICS = ['mae', 'r2', 'rmse']
+CLASSIFICATION_METRICS = ["acc", "auc", "balacc", "logloss"]
+REGRESSION_METRICS = ["mae", "r2", "rmse"]
 METRICS = CLASSIFICATION_METRICS + REGRESSION_METRICS
 
+
 def _get_mean_results_across_folds(df) -> pd.DataFrame:
-    """ Returns a dataframe with the task, id, metric and the mean values
-        across folds
+    """Returns a dataframe with the task, id, metric and the mean values
+    across folds
 
-        [idx: 'task', 'id', 'metric', ... mean metrics across folds ...]
+    [idx: 'task', 'id', 'metric', ... mean metrics across folds ...]
     """
     # Get the information about id and metric, only need info from first fold
 
     # [idx: task, id, metric]
-    df_info = df[df['fold'] == 0][['task', 'id', 'metric']].set_index('task')
+    df_info = df[df["fold"] == 0][["task", "id", "metric"]].set_index("task")
 
     # [idx: task, ... mean metrics across folds ...]
     available_metrics = list(set(METRICS).intersection(set(df.columns)))
-    df_means = df[['task'] + available_metrics].groupby(['task']).mean()
+    df_means = df[["task"] + available_metrics].groupby(["task"]).mean()
 
     return df_info.join(df_means)
 
+
 def generate_framework_def(
     user_dir: str,
     username: str,
     branch: str,
     commit: str,  # Not used in this setup but perhaps in a different one
 ):
-    """ Creates a framework definition to run an autosklearn repo.
+    """Creates a framework definition to run an autosklearn repo.
 
     Technically we only use the commit to pull the targeted version but for
     naming consistency, we need to know the branch too.
@@ -61,40 +63,44 @@ def generate_framework_def(
             or
             #8b474a437ce980bd0909db59141b40d56f6d5688
     """
-    assert len(commit) == 41 and commit[0] == '#' or len(commit) == 40, \
-        "Not a commit hash"
+    assert (
+        len(commit) == 41 and commit[0] == "#" or len(commit) == 40
+    ), "Not a commit hash"
 
     # automlbenchmark requires the '#' to identify it's a commit rather than
     # a branch being targeted
-    if commit[0] != '#':
-        commit = '#' + commit
+    if commit[0] != "#":
+        commit = "#" + commit
 
     # Tried commit and ssh repo but was getting errors with ssh
     # Tried commit and https but getting issues with commit ref
     # Using branch and https
     version = branch
-    repo = f'https://github.com/{username}/auto-sklearn.git'
+    repo = f"https://github.com/{username}/auto-sklearn.git"
 
     # Create the framework file
-    lines = '\n'.join([
-        f"---",
-        f"autosklearn_targeted:",
-        f"  extends: autosklearn",
-        f"  version: '{version}'",
-        f"  repo: '{repo}'"
-    ])
-
-    filepath = os.path.join(user_dir, 'frameworks.yaml')
-    with open(filepath, 'w') as f:
+    lines = "\n".join(
+        [
+            f"---",
+            f"autosklearn_targeted:",
+            f"  extends: autosklearn",
+            f"  version: '{version}'",
+            f"  repo: '{repo}'",
+        ]
+    )
+
+    filepath = os.path.join(user_dir, "frameworks.yaml")
+    with open(filepath, "w") as f:
         f.writelines(lines)
 
+
 def create_comparison(
     baseline_csv_classification: str,
     baseline_csv_regression: str,
     targeted_csv_classification: str,
     targeted_csv_regression: str,
 ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-    """ Creates a csv with comparisons between the baseline and results.
+    """Creates a csv with comparisons between the baseline and results.
 
     Scores are averaged across folds for a given task.
 
@@ -143,28 +149,33 @@ def create_comparison(
     df_targeted_means = _get_mean_results_across_folds(df_targeted)
 
     # Find the set intersection of tasks they have in common
-    common_tasks = set(df_baseline_means.index).intersection(set(df_targeted_means.index))
+    common_tasks = set(df_baseline_means.index).intersection(
+        set(df_targeted_means.index)
+    )
 
     # Find the set of metrics that are comparable
     baseline_metrics = set(METRICS).intersection(set(df_baseline_means.columns))
     common_metrics = baseline_metrics.intersection(set(df_targeted_means.columns))
 
     # Calculate the differences for in common tasks, across all available metrics
-    df_differences = df_targeted_means.loc[common_tasks][common_metrics] \
+    df_differences = (
+        df_targeted_means.loc[common_tasks][common_metrics]
         - df_baseline_means.loc[common_tasks][common_metrics]
+    )
 
     # Get the metric used for training and the dataset id of common tasks
-    df_info = df_baseline_means.loc[common_tasks][['id', 'metric']]
+    df_info = df_baseline_means.loc[common_tasks][["id", "metric"]]
 
     # Join together the info and the differences
     return df_baseline_means, df_targeted_means, df_info.join(df_differences)
 
+
 def create_comparisons_markdown(
     baseline_means_csv: str,
     targeted_means_csv: str,
     compared_means_csv: str,
 ) -> str:
-    """ Creates markdown that can be posted to Github that shows
+    """Creates markdown that can be posted to Github that shows
     a comparison between the baseline and the targeted branch.
 
     Parameters
@@ -186,36 +197,40 @@ def create_comparisons_markdown(
     """
     # Create colours and func to create the markdown for it
     colours = {
-        'Worse': ['353536', '800000', 'bd0000', 'ff0000'],
-        'Better': ['353536', '306300', '51a800', '6fe600'],
-        'Good': '6fe600',
-        'Bad': 'ff0000',
-        'Neutral': '353536',
-        'NaN': '52544f',
+        "Worse": ["353536", "800000", "bd0000", "ff0000"],
+        "Better": ["353536", "306300", "51a800", "6fe600"],
+        "Good": "6fe600",
+        "Bad": "ff0000",
+        "Neutral": "353536",
+        "NaN": "52544f",
     }
+
     def colour(kind, scale=None):
         c = colours[kind] if scale is None else colours[kind][scale]
-        return f'![#{c}](https://via.placeholder.com/15/{c}/000000?text=+)'
+        return f"![#{c}](https://via.placeholder.com/15/{c}/000000?text=+)"
 
     # Metrics, whether positive is better and the tolerances between each
     # Neutral, kind of good/bad, very good/bad etc...
     metric_tolerances = {
-        'acc': { 'positive_is_better': True ,'tol': [0.001, 0.01, 0.2] },
-        'auc':{ 'positive_is_better': True ,'tol': [0.001, 0.01, 0.2] },
-        'balacc': { 'positive_is_better': True ,'tol': [0.001, 0.01, 0.2] },
-        'logloss': { 'positive_is_better': False ,'tol': [0.009, 0.01, 0.2] },
-        'mae': { 'positive_is_better': False ,'tol': [0.001, 0.01, 0.2] },
-        'r2': { 'positive_is_better': True ,'tol': [0.001, 0.01, 0.2] },
-        'rmse': { 'positive_is_better': False ,'tol': [0.001, 0.01, 0.2] },
+        "acc": {"positive_is_better": True, "tol": [0.001, 0.01, 0.2]},
+        "auc": {"positive_is_better": True, "tol": [0.001, 0.01, 0.2]},
+        "balacc": {"positive_is_better": True, "tol": [0.001, 0.01, 0.2]},
+        "logloss": {"positive_is_better": False, "tol": [0.009, 0.01, 0.2]},
+        "mae": {"positive_is_better": False, "tol": [0.001, 0.01, 0.2]},
+        "r2": {"positive_is_better": True, "tol": [0.001, 0.01, 0.2]},
+        "rmse": {"positive_is_better": False, "tol": [0.001, 0.01, 0.2]},
     }
+
     def is_good(score, metric):
         return (
-            score > 0 and metric_tolerances[metric]['positive_is_better']
-            or score < 0 and not metric_tolerances[metric]['positive_is_better']
+            score > 0
+            and metric_tolerances[metric]["positive_is_better"]
+            or score < 0
+            and not metric_tolerances[metric]["positive_is_better"]
         )
 
     def is_neutral(diff, baseline_score, metric):
-        tolerance = metric_tolerances[metric]['tol'][0]
+        tolerance = metric_tolerances[metric]["tol"][0]
         if baseline_score == 0:
             baseline_score = 1e-10
         prc_diff = diff / baseline_score
@@ -223,18 +238,18 @@ def is_neutral(diff, baseline_score, metric):
 
     def tolerance_colour(baseline_value, comparison_value, metric):
         if np.isnan(baseline_value) or np.isnan(comparison_value):
-            return colour('NaN')
+            return colour("NaN")
 
         if baseline_value == 0:
             baseline_value = 1e-10
 
         prc_diff = comparison_value / baseline_value
 
-        tolerances = metric_tolerances[metric]['tol']
-        if metric_tolerances[metric]['positive_is_better']:
-            diff_color = 'Better' if prc_diff > 0 else 'Worse'
+        tolerances = metric_tolerances[metric]["tol"]
+        if metric_tolerances[metric]["positive_is_better"]:
+            diff_color = "Better" if prc_diff > 0 else "Worse"
         else:
-            diff_color = 'Better' if prc_diff < 0 else 'Worse'
+            diff_color = "Better" if prc_diff < 0 else "Worse"
 
         if abs(prc_diff) < tolerances[0]:
             return colour(diff_color, 0)
@@ -245,23 +260,24 @@ def tolerance_colour(baseline_value, comparison_value, metric):
         else:
             return colour(diff_color, 3)
 
-
     legend = {
-        'B': 'Baseline',
-        'T': 'Target Version',
-        '**Bold**': 'Training Metric',
-        '/': 'Missing Value',
-        '---': 'Missing Task'
+        "B": "Baseline",
+        "T": "Target Version",
+        "**Bold**": "Training Metric",
+        "/": "Missing Value",
+        "---": "Missing Task",
     }
-    legend.update({
-        key: colour(key)
-        for key in set(colours.keys()) - set(['Worse', 'Better', 'Good', 'Bad'])
-    })
+    legend.update(
+        {
+            key: colour(key)
+            for key in set(colours.keys()) - set(["Worse", "Better", "Good", "Bad"])
+        }
+    )
     # Worse and better are handled seperatly
 
-    compared = pd.read_csv(compared_means_csv, index_col='task')
-    baseline = pd.read_csv(baseline_means_csv, index_col='task')
-    targeted = pd.read_csv(targeted_means_csv, index_col='task')
+    compared = pd.read_csv(compared_means_csv, index_col="task")
+    baseline = pd.read_csv(baseline_means_csv, index_col="task")
+    targeted = pd.read_csv(targeted_means_csv, index_col="task")
 
     # Some things to keep track of for the textual summary
     n_performed_equally = 0
@@ -269,9 +285,9 @@ def tolerance_colour(baseline_value, comparison_value, metric):
     n_performed_worse = 0
     n_could_not_compare = 0
 
-    headers = ['task', 'metric'] + METRICS
-    table_header = '|'.join(headers)
-    seperator = '|'.join(len(headers) * ['---'])
+    headers = ["task", "metric"] + METRICS
+    table_header = "|".join(headers)
+    seperator = "|".join(len(headers) * ["---"])
 
     lines = [table_header, seperator]
 
@@ -279,13 +295,13 @@ def tolerance_colour(baseline_value, comparison_value, metric):
 
         # The chosen metric name and the csv column differ with neg_logloss and
         # logloss
-        training_metric = baseline.loc[task]['metric']
+        training_metric = baseline.loc[task]["metric"]
         if training_metric == "neg_logloss":
             training_metric = "logloss"
 
         # The baseline has tasks we can't compare with
         if task not in compared.index:
-            line = '|'.join([task, training_metric] + len(METRICS) * ['---'])
+            line = "|".join([task, training_metric] + len(METRICS) * ["---"])
             lines.append(line)
 
         # We can compare for a given tasks
@@ -299,34 +315,23 @@ def tolerance_colour(baseline_value, comparison_value, metric):
 
                 # If the metric does not exist for either, do fill it in as
                 # missing
-                if (
-                    metric not in baseline.columns
-                    and metric not in compared.columns
-                ):
+                if metric not in baseline.columns and metric not in compared.columns:
                     n_could_not_compare += 1
-                    entry = '/'
+                    entry = "/"
 
                 # If the metric exists in the baseline but not in the comparison
-                elif (
-                    metric in baseline.columns
-                    and not metric in compared.columns
-                ):
+                elif metric in baseline.columns and not metric in compared.columns:
                     n_could_not_compare += 1
-                    entry = '<br/>'.join([
-                        f' B : {baseline.loc[task][metric]:.3f}',
-                        f' T : /'
-                    ])
+                    entry = "<br/>".join(
+                        [f" B : {baseline.loc[task][metric]:.3f}", f" T : /"]
+                    )
 
                 # If the metric exists in the comparison but not in the baseline
-                elif (
-                    metric in compared.columns
-                    and not metric in baseline.columns
-                ):
+                elif metric in compared.columns and not metric in baseline.columns:
                     n_could_not_compare += 1
-                    entry = '<br/>'.join([
-                        f' B : /',
-                        f' T : {targeted.loc[task][metric]:.3f}'
-                    ])
+                    entry = "<br/>".join(
+                        [f" B : /", f" T : {targeted.loc[task][metric]:.3f}"]
+                    )
 
                 # The metric must exist in both
                 else:
@@ -339,37 +344,43 @@ def tolerance_colour(baseline_value, comparison_value, metric):
                     else:
                         n_performed_worse += 1
 
-                    diff_colour = tolerance_colour(baseline_score,
-                                                   compared_score,
-                                                   metric)
-                    entry = '<br/>'.join([
-                        f' B : {baseline.loc[task][metric]:.3f}',
-                        f' T : {targeted.loc[task][metric]:.3f}',
-                        f'{diff_colour}: {compared.loc[task][metric]:.3f}'
-                    ])
+                    diff_colour = tolerance_colour(
+                        baseline_score, compared_score, metric
+                    )
+                    entry = "<br/>".join(
+                        [
+                            f" B : {baseline.loc[task][metric]:.3f}",
+                            f" T : {targeted.loc[task][metric]:.3f}",
+                            f"{diff_colour}: {compared.loc[task][metric]:.3f}",
+                        ]
+                    )
 
                 # Make the training metric entry bold
                 if metric == training_metric:
-                    entry = '<b>' + entry + '</b>'
+                    entry = "<b>" + entry + "</b>"
 
                 entries.append(entry)
 
             # Create the line
-            line = '|'.join([task, training_metric] + entries)
+            line = "|".join([task, training_metric] + entries)
             lines.append(line)
 
     # Create the legend line
     score_scale = {
-        'worse': "".join(colour('Worse', scale) for scale in range(len(colours['Worse']) - 1, 0, -1)),
-        'better': "".join(colour('Better', scale) for scale in range(len(colours['Better'])))
+        "worse": "".join(
+            colour("Worse", scale) for scale in range(len(colours["Worse"]) - 1, 0, -1)
+        ),
+        "better": "".join(
+            colour("Better", scale) for scale in range(len(colours["Better"]))
+        ),
     }
     score_scale = f'worse {score_scale["worse"] + score_scale["better"]} better'
 
-    legend_str = '\t\t\t||\t\t'.join([score_scale] + [
-        f'{key} - {text}' for key, text in legend.items()
-    ])
+    legend_str = "\t\t\t||\t\t".join(
+        [score_scale] + [f"{key} - {text}" for key, text in legend.items()]
+    )
 
-    lines.append('')
+    lines.append("")
     lines.append(legend_str)
 
     # Create a textual summary to go at the top
@@ -377,7 +388,6 @@ def tolerance_colour(baseline_value, comparison_value, metric):
     compared_tasks = list(compared.index)
     non_compared_tasks = list(set(baseline.index) - set(compared_tasks))
 
-
     # Populate info about each metric
     per_metric_info = {}
     for metric in compared_metrics:
@@ -387,36 +397,37 @@ def tolerance_colour(baseline_value, comparison_value, metric):
 
         item_colour = ""
         if is_neutral(compared_average, baseline_average, metric):
-            item_colour = colour('Neutral')
+            item_colour = colour("Neutral")
         elif is_good(compared_average, metric):
-            item_colour = colour('Good')
+            item_colour = colour("Good")
         else:
-            item_colour = colour('Bad')
+            item_colour = colour("Bad")
 
         per_metric_info[metric] = {
-            'average': compared_average,
-            'n_compared': n_compared,
-            'colour': item_colour
+            "average": compared_average,
+            "n_compared": n_compared,
+            "colour": item_colour,
         }
 
-    summary = '\n'.join([
-        f"# Results",
-        f"Overall the targeted versions performance across {len(compared_tasks)} task(s) and {len(compared_metrics)} metric(s)",
-        f"",
-        f"*  Equally on <b>{n_performed_equally}</b> comparisons",
-        f"*  Better on <b>{n_performed_better}</b> comparisons",
-        f"*  Worse on <b>{n_performed_worse}</b> comparisons",
-        f"",
-        f"There were <b>{len(non_compared_tasks)}</b> task(s) that could not be compared.",
-        f"",
-        f"The average change for each metric is:"
-        f""
-        ] + [
+    summary = "\n".join(
+        [
+            f"# Results",
+            f"Overall the targeted versions performance across {len(compared_tasks)} task(s) and {len(compared_metrics)} metric(s)",
+            f"",
+            f"*  Equally on <b>{n_performed_equally}</b> comparisons",
+            f"*  Better on <b>{n_performed_better}</b> comparisons",
+            f"*  Worse on <b>{n_performed_worse}</b> comparisons",
+            f"",
+            f"There were <b>{len(non_compared_tasks)}</b> task(s) that could not be compared.",
+            f"",
+            f"The average change for each metric is:" f"",
+        ]
+        + [
             f"* <b>{metric}: </b> {info['colour']} {info['average']:.4f} across {info['n_compared']} task(s)"
             for metric, info in per_metric_info.items()
         ]
     )
-    return '\n'.join([summary] + [""] + lines)
+    return "\n".join([summary] + [""] + lines)
 
 
 if __name__ == "__main__":
@@ -424,30 +435,30 @@ def tolerance_colour(baseline_value, comparison_value, metric):
 
     # Generates a framework definition for automlbenchmark so that we can target
     # auto-sklearn versions that are not our own
-    parser.add_argument('--generate-framework-def', action='store_true')
-    parser.add_argument('--user-dir', type=str)
-    parser.add_argument('--owner', type=str)
-    parser.add_argument('--branch', type=str)
-    parser.add_argument('--commit', type=str)
+    parser.add_argument("--generate-framework-def", action="store_true")
+    parser.add_argument("--user-dir", type=str)
+    parser.add_argument("--owner", type=str)
+    parser.add_argument("--branch", type=str)
+    parser.add_argument("--commit", type=str)
 
     # For comparing results generated by automlbenchmark for:
     #  -> baseline results generated [--baseline-csv]
     #  -> targeted results generated [--target-csv]
-    # by automlbenchmark and the target branch 'results' generated 
-    parser.add_argument('--compare-results', action='store_true')
-    parser.add_argument('--baseline-csv-classification', type=str)
-    parser.add_argument('--baseline-csv-regression', type=str)
-    parser.add_argument('--targeted-csv-classification', type=str)
-    parser.add_argument('--targeted-csv-regression', type=str)
-    parser.add_argument('--baseline-means-to', type=str)
-    parser.add_argument('--targeted-means-to', type=str)
-    parser.add_argument('--compared-means-to', type=str)
+    # by automlbenchmark and the target branch 'results' generated
+    parser.add_argument("--compare-results", action="store_true")
+    parser.add_argument("--baseline-csv-classification", type=str)
+    parser.add_argument("--baseline-csv-regression", type=str)
+    parser.add_argument("--targeted-csv-classification", type=str)
+    parser.add_argument("--targeted-csv-regression", type=str)
+    parser.add_argument("--baseline-means-to", type=str)
+    parser.add_argument("--targeted-means-to", type=str)
+    parser.add_argument("--compared-means-to", type=str)
 
     # For generating markdown that can be posted to github that shows the results
-    parser.add_argument('--generate-markdown', action='store_true')
-    parser.add_argument('--compared-means-csv', type=str)
-    parser.add_argument('--baseline-means-csv', type=str)
-    parser.add_argument('--targeted-means-csv', type=str)
+    parser.add_argument("--generate-markdown", action="store_true")
+    parser.add_argument("--compared-means-csv", type=str)
+    parser.add_argument("--baseline-means-csv", type=str)
+    parser.add_argument("--targeted-means-csv", type=str)
 
     args = parser.parse_args()
 
@@ -459,11 +470,17 @@ def tolerance_colour(baseline_value, comparison_value, metric):
 
     elif args.compare_results:
 
-        assert all([
-            args.baseline_csv_classification, args.baseline_csv_regression,
-            args.targeted_csv_classification, args.baseline_csv_regression,
-            args.baseline_means_to, args.targeted_means_to, args.compared_means_to
-        ])
+        assert all(
+            [
+                args.baseline_csv_classification,
+                args.baseline_csv_regression,
+                args.targeted_csv_classification,
+                args.baseline_csv_regression,
+                args.baseline_means_to,
+                args.targeted_means_to,
+                args.compared_means_to,
+            ]
+        )
 
         baseline_means, targeted_means, compared_means = create_comparison(
             baseline_csv_classification=args.baseline_csv_classification,
@@ -480,9 +497,9 @@ def tolerance_colour(baseline_value, comparison_value, metric):
             df.to_csv(path)
 
     elif args.generate_markdown:
-        assert all([
-            args.baseline_means_csv, args.targeted_means_csv, args.compared_means_csv
-        ])
+        assert all(
+            [args.baseline_means_csv, args.targeted_means_csv, args.compared_means_csv]
+        )
 
         comparisons_table = create_comparisons_markdown(
             baseline_means_csv=args.baseline_means_csv,
diff --git a/.github/workflows/black_checker.yml b/.github/workflows/black_checker.yml
deleted file mode 100644
index fac1723682..0000000000
--- a/.github/workflows/black_checker.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: black-format-check
-
-on:
-  # Manually triggerable in github
-  workflow_dispatch:
-
-  # When a push occurs on either of these branches
-  push:
-    branches:
-      - master
-      - development
-
-  # When a push occurs on a PR that targets these branches
-  pull_request:
-    branches:
-      - master
-      - development
-
-env:
-  #If STRICT is set to true, it will fail on black check fail
-  STRICT: false
-
-jobs:
-
-  black-format-check:
-    runs-on: ubuntu-latest
-    steps:
-
-    - name: Checkout
-      uses: actions/checkout@v2
-      with:
-        submodules: recursive
-
-    - name: Setup Python 3.7
-      uses: actions/setup-python@v2
-      with:
-        python-version: "3.7"
-
-    - name: Install black
-      run: |
-        pip install black
-
-    - name: Run Black Check
-      run: |
-        black --check --diff --line-length 100 ./autosklearn || ! $STRICT
-        black --check --diff --line-length 100 ./test || ! $STRICT
-        black --check --diff --line-length 100 ./examples|| ! $STRICT
diff --git a/.github/workflows/isort_checker.yml b/.github/workflows/isort_checker.yml
deleted file mode 100644
index 4f1f03f5a8..0000000000
--- a/.github/workflows/isort_checker.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: isort-check
-
-on:
-  # Manually triggerable in github
-  workflow_dispatch:
-
-  # When a push occurs on either of these branches
-  push:
-    branches:
-      - master
-      - development
-
-  # When a push occurs on a PR that targets these branches
-  pull_request:
-    branches:
-      - master
-      - development
-
-env:
-  #If STRICT is set to true, it will fail on isort check fail
-  STRICT: false
-
-jobs:
-
-  isort-format-check:
-    runs-on: ubuntu-latest
-    steps:
-
-    - name: Checkout
-      uses: actions/checkout@v2
-      with:
-        submodules: recursive
-
-    - name: Setup Python 3.7
-      uses: actions/setup-python@v2
-      with:
-        python-version: "3.7"
-
-    - name: Install isort
-      run: |
-        pip install isort
-
-    - name: Run isort Check
-      run: |
-        isort --check-only autosklearn || ! $STRICT
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 91b5bbdf54..4c8a59dc4e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,57 +1,62 @@
+# If you see me, please update my `rev` field using the provided links
+# Click the repo and update to latest tags.
+# If things break on update, raise an issue
 repos:
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.10.1
+    hooks:
+      - id: isort
+        name: isort imports autosklearn
+        files: autosklearn/.*
+        args: [--check]
+
+      - id: isort
+        name: isort imports test
+        files: test/.*
+        args: [--check]
+
+  - repo: https://github.com/ambv/black
+    rev: 22.1.0
+    hooks:
+      - id: black
+        name: black formatter autosklearn
+        files: autosklearn/.*
+        args: [--check]
+
+      - id: black
+        name: black formatter test
+        files: test/.*
+        args: [--check]
+
+      - id: black
+        name: black formatter examples
+        files: examples/.*
+        args: [--check]
+
+  # This is disabled as most modules fail this
+  - repo: https://github.com/pycqa/pydocstyle
+    rev: 6.1.1
+    hooks:
+      - id: pydocstyle
+        files: DISABLED # autosklearn/.*
+        always_run: false
+        additional_dependencies: ["toml"] # Needed to parse pyproject.toml
+
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.761
+    rev: v0.930
     hooks:
       - id: mypy
-        args: [--show-error-codes]
-        name: mypy auto-sklearn-ensembles
-        files: autosklearn/ensembles
-      - id: mypy
-        args: [--show-error-codes]
-        name: mypy auto-sklearn-metrics
-        files: autosklearn/metrics
-      - id: mypy
-        args: [--show-error-codes]
-        name: mypy auto-sklearn-data
-        files: autosklearn/data
-      - id: mypy
-        args: [--show-error-codes]
-        name: mypy auto-sklearn-util
-        files: autosklearn/util
-      - id: mypy
-        args: [--show-error-codes]
-        name: mypy auto-sklearn-evaluation
-        files: autosklearn/evaluation
-      - id: mypy
-        args: [--show-error-codes]
-        name: mypy auto-sklearn-datapreprocessing
-        files: autosklearn/pipeline/components/data_preprocessing/
+        name: mypy auto-sklearn
+        files: autosklearn/.*
+
   - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.3
+    rev: 4.0.1
     hooks:
       - id: flake8
-        name: flake8 auto-sklearn
+        name: flake8 autosklearn
         files: autosklearn/.*
+
       - id: flake8
-        name: flake8 file-order-data
-        files: autosklearn/data
-        additional_dependencies:
-          - flake8-import-order
-      - id: flake8
-        name: flake8 file-order-ensemble
-        files: autosklearn/ensembles
-        additional_dependencies:
-          - flake8-import-order
-      - id: flake8
-        name: flake8 file-order-metrics
-        files: autosklearn/metrics
-        additional_dependencies:
-          - flake8-import-order
-      - id: flake8
-        name: flake8 file-order-util
-        files:  autosklearn/util
-        additional_dependencies:
-          - flake8-import-order
-      - id: flake8
-        name: flake8 autosklearn-test
+        name: flake8 test
         files: test/.*
diff --git a/Makefile b/Makefile
index b00a2392ec..495b1c2f24 100644
--- a/Makefile
+++ b/Makefile
@@ -1,32 +1,122 @@
-# simple makefile to simplify repetitive build env management tasks under posix
+# NOTE: Used on linux, limited support outside of Linux
+#
+# A simple makefile to help with small tasks related to development of autosklearn
+# These have been configured to only really run short tasks. Longer form tasks
+# are usually completed in github actions.
+
+.PHONY: help install-dev check format pre-commit clean clean-doc clean-build build doc links examples publish test
+
+help:
+	@echo "Makefile autosklearn"
+	@echo "* install-dev      to install all dev requirements and install pre-commit"
+	@echo "* check            to check the source code for issues"
+	@echo "* format           to format the code with black and isort"
+	@echo "* pre-commit       to run the pre-commit check"
+	@echo "* clean            to clean the dist and doc build files"
+	@echo "* build            to build a dist"
+	@echo "* doc              to generate and view the html files"
+	@echo "* linkcheck        to check the documentation links"
+	@echo "* examples         to run and generate the examples"
+	@echo "* publish          to help publish the current branch to pypi"
+	@echo "* test             to run the tests"
 
 PYTHON ?= python
 CYTHON ?= cython
 PYTEST ?= python -m pytest
 CTAGS ?= ctags
+PIP ?= python -m pip
+MAKE ?= make
+BLACK ?= black
+ISORT ?= isort
+PYDOCSTYLE ?= pydocstyle
+MYPY ?= mypy
+PRECOMMIT ?= pre-commit
+FLAKE8 ?= flake8
+
+DIR := ${CURDIR}
+DIST := ${CURDIR}/dist
+DOCDIR := ${DIR}/doc
+INDEX_HTML := file://${DOCDIR}/html/build/index.html
+
+install-dev:
+	$(PIP) install -e ".[test,examples,docs]"
+	pre-commit install
+
+check-black:
+	$(BLACK) autosklearn examples test --check || :
+
+check-isort:
+	$(ISORT) autosklearn test --check || :
+
+check-pydocstyle:
+	$(PYDOCSTYLE) autosklearn || :
+
+check-mypy:
+	$(MYPY) autosklearn || :
+
+check-flake8:
+	$(FLAKE8) autosklearn || :
+	$(FLAKE8) test || :
 
-all: clean inplace test
+# pydocstyle does not have easy ignore rules, instead, we include as they are covered
+check: check-black check-isort check-mypy check-flake8 # check-pydocstyle
 
-clean:
+pre-commit:
+	$(PRECOMMIT) run --all-files
+
+format-black:
+	$(BLACK) autosklearn/.*
+	$(BLACK) test/.*
+	$(BLACK) examples/.*
+
+format-isort:
+	$(ISORT) autosklearn
+	$(ISORT) test
+
+
+format: format-black format-isort
+
+clean-doc:
+	$(MAKE) -C ${DOCDIR} clean
+
+clean-build:
 	$(PYTHON) setup.py clean
-	rm -rf dist
+	rm -rf ${DIST}
 
-in: inplace # just a shortcut
-inplace:
-	$(PYTHON) setup.py build_ext -i
+# Clean up any builds in ./dist as well as doc
+clean: clean-doc clean-build
+
+# Build a distribution in ./dist
+build:
+	$(PYTHON) setup.py bdist
 
 doc:
-	cd ./doc
-	make html
-	cd ..
+	$(MAKE) -C ${DOCDIR} html-noexamples
+	@echo
+	@echo "View docs at:"
+	@echo ${INDEX_HTML}
+
+links:
+	$(MAKE) -C ${DOCDIR} linkcheck
 
-test-code: in
-	$(PYTEST) -s -v test
-test-doc:
-	$(PYTEST) -s -v doc/*.rst
+examples:
+	$(MAKE) -C ${DOCDIR} html
+	@echo
+	@echo "View docs at:"
+	@echo ${INDEX_HTML}
 
-test-coverage:
-	rm -rf coverage .coverage
-	$(PYTEST) -s -v --with-coverage test
+# Publish to testpypi
+# Will echo the commands to actually publish to be run to publish to actual PyPi
+# This is done to prevent accidental publishing but provide the same conveniences
+publish: clean-build build
+	$(PIP) install twine
+	$(PYTHON) -m twine upload --repository testpypi ${DIST}/*
+	@echo
+	@echo "Test with the following line:"
+	@echo "pip install --index-url https://test.pypi.org/simple/ auto-sklearn"
+	@echo
+	@echo "Once you have decided it works, publish to actual pypi with"
+	@echo "python -m twine upload dist/*"
 
-test: test-code test-sphinxext test-doc
+test:
+	$(PYTEST) test
diff --git a/autosklearn/__init__.py b/autosklearn/__init__.py
index dae47a1089..2bff637af8 100644
--- a/autosklearn/__init__.py
+++ b/autosklearn/__init__.py
@@ -1,27 +1,26 @@
 # -*- encoding: utf-8 -*-
 import os
-import pkg_resources
 import sys
 
-from autosklearn.util import dependencies
-from autosklearn.__version__ import __version__  # noqa (imported but unused)
+import pkg_resources
 
+from autosklearn.__version__ import __version__  # noqa (imported but unused)
+from autosklearn.util import dependencies
 
-requirements = pkg_resources.resource_string('autosklearn', 'requirements.txt')
-requirements = requirements.decode('utf-8')
+requirements = pkg_resources.resource_string("autosklearn", "requirements.txt")
+requirements = requirements.decode("utf-8")
 
 dependencies.verify_packages(requirements)
 
-if os.name != 'posix':
+if os.name != "posix":
     raise ValueError(
-        'Detected unsupported operating system: %s. Please check '
-        'the compability information of auto-sklearn: https://automl.github.io'
-        '/auto-sklearn/stable/installation.html#windows-osx-compability' %
-        sys.platform
+        "Detected unsupported operating system: %s. Please check "
+        "the compability information of auto-sklearn: https://automl.github.io"
+        "/auto-sklearn/stable/installation.html#windows-osx-compability" % sys.platform
     )
 
 if sys.version_info < (3, 6):
     raise ValueError(
-        'Unsupported python version %s found. Auto-sklearn requires Python '
-        '3.6 or higher.' % sys.version_info
+        "Unsupported python version %s found. Auto-sklearn requires Python "
+        "3.6 or higher." % sys.version_info
     )
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index f8057d130b..cee7c492f3 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -1,90 +1,102 @@
 # -*- encoding: utf-8 -*-
+from typing import Any, Dict, List, Mapping, Optional, Tuple, Union, cast
+
 import copy
-import distro
 import io
 import json
-import platform
 import logging.handlers
 import multiprocessing
 import os
+import platform
 import sys
+import tempfile
 import time
-from typing import Any, Dict, Mapping, Optional, List, Tuple, Union, cast
-import uuid
 import unittest.mock
-import tempfile
+import uuid
 
-from ConfigSpace.configuration_space import Configuration
-from ConfigSpace.read_and_write import json as cs_json
 import dask
 import dask.distributed
+import distro
+import joblib
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
 import pkg_resources
 import scipy.stats
-from sklearn.base import BaseEstimator
-from sklearn.ensemble import VotingRegressor
-from sklearn.model_selection._split import _RepeatedSplits, \
-    BaseShuffleSplit, BaseCrossValidator
-from smac.runhistory.runhistory import RunInfo, RunValue
-from smac.tae import StatusType
-from smac.stats.stats import Stats
-import joblib
 import sklearn.utils
+from ConfigSpace.configuration_space import Configuration
+from ConfigSpace.read_and_write import json as cs_json
 from scipy.sparse import spmatrix
+from sklearn.base import BaseEstimator
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import VotingRegressor
+from sklearn.metrics._classification import type_of_target
+from sklearn.model_selection._split import (
+    BaseCrossValidator,
+    BaseShuffleSplit,
+    _RepeatedSplits,
+)
 from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_is_fitted
-from sklearn.metrics._classification import type_of_target
-from sklearn.dummy import DummyClassifier, DummyRegressor
+from smac.runhistory.runhistory import RunInfo, RunValue
+from smac.stats.stats import Stats
+from smac.tae import StatusType
 
 from autosklearn.automl_common.common.utils.backend import Backend, create
-
-from autosklearn.metrics import Scorer, default_metric_for_task
-from autosklearn.data.xy_data_manager import XYDataManager
+from autosklearn.constants import (
+    BINARY_CLASSIFICATION,
+    CLASSIFICATION_TASKS,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    MULTIOUTPUT_REGRESSION,
+    REGRESSION,
+    REGRESSION_TASKS,
+)
 from autosklearn.data.validation import (
-    convert_if_sparse,
-    InputValidator,
     SUPPORTED_FEAT_TYPES,
     SUPPORTED_TARGET_TYPES,
+    InputValidator,
+    convert_if_sparse,
 )
+from autosklearn.data.xy_data_manager import XYDataManager
+from autosklearn.ensemble_builder import EnsembleBuilderManager
+from autosklearn.ensembles.singlebest_ensemble import SingleBest
 from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
 from autosklearn.evaluation.abstract_evaluator import _fit_and_suppress_warnings
 from autosklearn.evaluation.train_evaluator import TrainEvaluator, _fit_with_budget
-from autosklearn.metrics import calculate_metric
+from autosklearn.metrics import Scorer, calculate_metric, default_metric_for_task
+from autosklearn.pipeline.base import BasePipeline
+from autosklearn.pipeline.components.classification import ClassifierChoice
+from autosklearn.pipeline.components.data_preprocessing.categorical_encoding import (
+    OHEChoice,
+)
+from autosklearn.pipeline.components.data_preprocessing.minority_coalescense import (
+    CoalescenseChoice,
+)
+from autosklearn.pipeline.components.data_preprocessing.rescaling import RescalingChoice
+from autosklearn.pipeline.components.feature_preprocessing import (
+    FeaturePreprocessorChoice,
+)
+from autosklearn.pipeline.components.regression import RegressorChoice
+from autosklearn.smbo import AutoMLSMBO
+from autosklearn.util import RE_PATTERN, pipeline
 from autosklearn.util.data import (
+    DatasetCompressionSpec,
+    default_dataset_compression_arg,
     reduce_dataset_size_if_too_large,
     supported_precision_reductions,
     validate_dataset_compression_arg,
-    default_dataset_compression_arg,
-    DatasetCompressionSpec,
 )
-from autosklearn.util.stopwatch import StopWatch
 from autosklearn.util.logging_ import (
+    PicklableClientLogger,
+    get_named_client_logger,
     setup_logger,
     start_log_server,
-    get_named_client_logger,
     warnings_to,
-    PicklableClientLogger,
 )
-from autosklearn.util import pipeline, RE_PATTERN
 from autosklearn.util.parallel import preload_modules
-from autosklearn.ensemble_builder import EnsembleBuilderManager
-from autosklearn.ensembles.singlebest_ensemble import SingleBest
-from autosklearn.smbo import AutoMLSMBO
-from autosklearn.constants import MULTILABEL_CLASSIFICATION, MULTICLASS_CLASSIFICATION, \
-    REGRESSION_TASKS, REGRESSION, BINARY_CLASSIFICATION, MULTIOUTPUT_REGRESSION, \
-    CLASSIFICATION_TASKS
-from autosklearn.pipeline.base import BasePipeline
-from autosklearn.pipeline.components.classification import ClassifierChoice
-from autosklearn.pipeline.components.regression import RegressorChoice
-from autosklearn.pipeline.components.feature_preprocessing import FeaturePreprocessorChoice
-from autosklearn.pipeline.components.data_preprocessing.categorical_encoding import OHEChoice
-from autosklearn.pipeline.components.data_preprocessing.minority_coalescense import (
-    CoalescenseChoice
-)
-from autosklearn.pipeline.components.data_preprocessing.rescaling import RescalingChoice
 from autosklearn.util.single_thread_client import SingleThreadedClient
+from autosklearn.util.stopwatch import StopWatch
 
 
 def _model_predict(
@@ -94,7 +106,7 @@ def _model_predict(
     batch_size: Optional[int] = None,
     logger: Optional[PicklableClientLogger] = None,
 ) -> np.ndarray:
-    """ Generates the predictions from a model.
+    """Generates the predictions from a model.
 
     This is seperated out into a seperate function to allow for multiprocessing
     and perform parallel predictions.
@@ -149,24 +161,25 @@ def _model_predict(
             else:
                 predict_func = model.predict
 
-            if batch_size is not None and hasattr(model, 'batch_size'):
+            if batch_size is not None and hasattr(model, "batch_size"):
                 prediction = predict_func(X_, batch_size=batch_size)
             else:
                 prediction = predict_func(X_)
 
     # Check that probability values lie between 0 and 1.
     if task in CLASSIFICATION_TASKS:
-        assert (prediction >= 0).all() and (prediction <= 1).all(), \
-            f"For {model}, prediction probability not within [0, 1]!"
+        assert (prediction >= 0).all() and (
+            prediction <= 1
+        ).all(), f"For {model}, prediction probability not within [0, 1]!"
 
-    assert prediction.shape[0] == X_.shape[0], \
-        f"Prediction shape {model} is {prediction.shape} while X_.shape is {X_.shape}"
+    assert (
+        prediction.shape[0] == X_.shape[0]
+    ), f"Prediction shape {model} is {prediction.shape} while X_.shape is {X_.shape}"
 
     return prediction
 
 
 class AutoML(BaseEstimator):
-
     def __init__(
         self,
         time_left_for_this_task,
@@ -183,7 +196,7 @@ def __init__(
         debug_mode=False,
         include=None,
         exclude=None,
-        resampling_strategy='holdout-iterative-fit',
+        resampling_strategy="holdout-iterative-fit",
         resampling_strategy_arguments=None,
         n_jobs=None,
         dask_client: Optional[dask.distributed.Client] = None,
@@ -195,7 +208,7 @@ def __init__(
         metric=None,
         scoring_functions=None,
         get_trials_callback=None,
-        dataset_compression: Union[bool, Mapping[str, Any]] = True
+        dataset_compression: Union[bool, Mapping[str, Any]] = True,
     ):
         super(AutoML, self).__init__()
         self.configuration_space = None
@@ -205,8 +218,9 @@ def __init__(
         # self._tmp_dir = tmp_dir
         self._time_for_task = time_left_for_this_task
         self._per_run_time_limit = per_run_time_limit
-        self._initial_configurations_via_metalearning = \
+        self._initial_configurations_via_metalearning = (
             initial_configurations_via_metalearning
+        )
         self._ensemble_size = ensemble_size
         self._ensemble_nbest = ensemble_nbest
         self._max_models_on_disc = max_models_on_disc
@@ -217,9 +231,14 @@ def __init__(
         self._include = include
         self._exclude = exclude
         self._resampling_strategy = resampling_strategy
-        self._scoring_functions = scoring_functions if scoring_functions is not None else []
-        self._resampling_strategy_arguments = resampling_strategy_arguments \
-            if resampling_strategy_arguments is not None else {}
+        self._scoring_functions = (
+            scoring_functions if scoring_functions is not None else []
+        )
+        self._resampling_strategy_arguments = (
+            resampling_strategy_arguments
+            if resampling_strategy_arguments is not None
+            else {}
+        )
         self._n_jobs = n_jobs
         self._dask_client = dask_client
 
@@ -227,15 +246,24 @@ def __init__(
         self._disable_evaluator_output = disable_evaluator_output
         # Check arguments prior to doing anything!
         if not isinstance(self._disable_evaluator_output, (bool, List)):
-            raise ValueError('disable_evaluator_output must be of type bool '
-                             'or list.')
+            raise ValueError(
+                "disable_evaluator_output must be of type bool " "or list."
+            )
         if isinstance(self._disable_evaluator_output, List):
-            allowed_elements = ['model', 'cv_model', 'y_optimization', 'y_test', 'y_valid']
+            allowed_elements = [
+                "model",
+                "cv_model",
+                "y_optimization",
+                "y_test",
+                "y_valid",
+            ]
             for element in self._disable_evaluator_output:
                 if element not in allowed_elements:
-                    raise ValueError("List member '%s' for argument "
-                                     "'disable_evaluator_output' must be one "
-                                     "of " + str(allowed_elements))
+                    raise ValueError(
+                        "List member '%s' for argument "
+                        "'disable_evaluator_output' must be one "
+                        "of " + str(allowed_elements)
+                    )
         self._get_smac_object_callback = get_smac_object_callback
         self._get_trials_callback = get_trials_callback
         self._smac_scenario_args = smac_scenario_args
@@ -280,17 +308,21 @@ def __init__(
         # examples. Nevertheless, multi-process runs
         # have spawn as requirement to reduce the
         # possibility of a deadlock
-        self._multiprocessing_context = 'forkserver'
+        self._multiprocessing_context = "forkserver"
         if self._n_jobs == 1 and self._dask_client is None:
-            self._multiprocessing_context = 'fork'
+            self._multiprocessing_context = "fork"
             self._dask_client = SingleThreadedClient()
 
         if not isinstance(self._time_for_task, int):
-            raise ValueError("time_left_for_this_task not of type integer, "
-                             "but %s" % str(type(self._time_for_task)))
+            raise ValueError(
+                "time_left_for_this_task not of type integer, "
+                "but %s" % str(type(self._time_for_task))
+            )
         if not isinstance(self._per_run_time_limit, int):
-            raise ValueError("per_run_time_limit not of type integer, but %s" %
-                             str(type(self._per_run_time_limit)))
+            raise ValueError(
+                "per_run_time_limit not of type integer, but %s"
+                % str(type(self._per_run_time_limit))
+            )
 
         # By default try to use the TCP logging port or get a new port
         self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
@@ -331,7 +363,7 @@ def _create_dask_client(self):
 
     def _close_dask_client(self):
         if (
-            hasattr(self, '_is_dask_client_internally_created')
+            hasattr(self, "_is_dask_client_internally_created")
             and self._is_dask_client_internally_created
             and self._dask_client
         ):
@@ -343,13 +375,13 @@ def _close_dask_client(self):
             del self._is_dask_client_internally_created
 
     def _get_logger(self, name):
-        logger_name = 'AutoML(%d):%s' % (self._seed, name)
+        logger_name = "AutoML(%d):%s" % (self._seed, name)
 
         # Setup the configuration for the logger
         # This is gonna be honored by the server
         # Which is created below
         setup_logger(
-            filename='%s.log' % str(logger_name),
+            filename="%s.log" % str(logger_name),
             logging_config=self.logging_config,
             output_dir=self._backend.temporary_directory,
         )
@@ -363,17 +395,17 @@ def _get_logger(self, name):
         context = multiprocessing.get_context(self._multiprocessing_context)
         preload_modules(context)
         self.stop_logging_server = context.Event()
-        port = context.Value('l')  # be safe by using a long
+        port = context.Value("l")  # be safe by using a long
         port.value = -1
 
         self.logging_server = context.Process(
             target=start_log_server,
             kwargs=dict(
-                host='localhost',
+                host="localhost",
                 logname=logger_name,
                 event=self.stop_logging_server,
                 port=port,
-                filename='%s.log' % str(logger_name),
+                filename="%s.log" % str(logger_name),
                 logging_config=self.logging_config,
                 output_dir=self._backend.temporary_directory,
             ),
@@ -392,12 +424,12 @@ def _get_logger(self, name):
 
         return get_named_client_logger(
             name=logger_name,
-            host='localhost',
+            host="localhost",
             port=self._logger_port,
         )
 
     def _clean_logger(self):
-        if not hasattr(self, 'stop_logging_server') or self.stop_logging_server is None:
+        if not hasattr(self, "stop_logging_server") or self.stop_logging_server is None:
             return
 
         # Clean up the logger
@@ -423,20 +455,19 @@ def _stop_task(watcher, task_name):
         watcher.stop_task(task_name)
 
     @staticmethod
-    def _print_load_time(basename, time_left_for_this_task,
-                         time_for_load_data, logger):
+    def _print_load_time(basename, time_left_for_this_task, time_for_load_data, logger):
 
-        time_left_after_reading = max(
-            0, time_left_for_this_task - time_for_load_data)
-        logger.info('Remaining time after reading %s %5.2f sec' %
-                    (basename, time_left_after_reading))
+        time_left_after_reading = max(0, time_left_for_this_task - time_for_load_data)
+        logger.info(
+            "Remaining time after reading %s %5.2f sec"
+            % (basename, time_left_after_reading)
+        )
         return time_for_load_data
 
     def _do_dummy_prediction(self, datamanager: XYDataManager, num_run: int) -> int:
 
         # When using partial-cv it makes no sense to do dummy predictions
-        if self._resampling_strategy in ['partial-cv',
-                                         'partial-cv-iterative-fit']:
+        if self._resampling_strategy in ["partial-cv", "partial-cv-iterative-fit"]:
             return num_run
 
         self._logger.info("Starting to create dummy predictions.")
@@ -451,53 +482,47 @@ def _do_dummy_prediction(self, datamanager: XYDataManager, num_run: int) -> int:
         # already be generated here!
         stats = Stats(scenario_mock)
         stats.start_timing()
-        ta = ExecuteTaFuncWithQueue(backend=self._backend,
-                                    autosklearn_seed=self._seed,
-                                    resampling_strategy=self._resampling_strategy,
-                                    initial_num_run=num_run,
-                                    stats=stats,
-                                    metric=self._metric,
-                                    memory_limit=memory_limit,
-                                    disable_file_output=self._disable_evaluator_output,
-                                    abort_on_first_run_crash=False,
-                                    cost_for_crash=get_cost_of_crash(self._metric),
-                                    port=self._logger_port,
-                                    pynisher_context=self._multiprocessing_context,
-                                    **self._resampling_strategy_arguments)
-
-        status, cost, runtime, additional_info = ta.run(num_run, cutoff=self._time_for_task)
+        ta = ExecuteTaFuncWithQueue(
+            backend=self._backend,
+            autosklearn_seed=self._seed,
+            resampling_strategy=self._resampling_strategy,
+            initial_num_run=num_run,
+            stats=stats,
+            metric=self._metric,
+            memory_limit=memory_limit,
+            disable_file_output=self._disable_evaluator_output,
+            abort_on_first_run_crash=False,
+            cost_for_crash=get_cost_of_crash(self._metric),
+            port=self._logger_port,
+            pynisher_context=self._multiprocessing_context,
+            **self._resampling_strategy_arguments,
+        )
+
+        status, cost, runtime, additional_info = ta.run(
+            num_run, cutoff=self._time_for_task
+        )
         if status == StatusType.SUCCESS:
             self._logger.info("Finished creating dummy predictions.")
+
+        # Fail if dummy prediction fails.
         else:
-            if additional_info.get('exitcode') == -6:
-                self._logger.error(
-                    "Dummy prediction failed with run state %s. "
-                    "The error suggests that the provided memory limits were too tight. Please "
-                    "increase the 'memory_limit' and try again. If this does not solve your "
-                    "problem, please open an issue and paste the additional output. "
-                    "Additional output: %s.",
-                    str(status), str(additional_info),
+            if additional_info.get("exitcode") == -6:
+                msg = (
+                    f"Dummy prediction failed with run state {status}."
+                    " The error suggests that the provided memory limits are too tight."
+                    " Please increase the 'memory_limit' and try again. If this does"
+                    " not solve your problem, please open an issue and paste the"
+                    f" additional output. Additional output: {additional_info}"
                 )
-                # Fail if dummy prediction fails.
-                raise ValueError(
-                    "Dummy prediction failed with run state %s. "
-                    "The error suggests that the provided memory limits were too tight. Please "
-                    "increase the 'memory_limit' and try again. If this does not solve your "
-                    "problem, please open an issue and paste the additional output. "
-                    "Additional output: %s." %
-                    (str(status), str(additional_info)),
-                )
-
             else:
-                self._logger.error(
-                    "Dummy prediction failed with run state %s and additional output: %s.",
-                    str(status), str(additional_info),
-                )
-                # Fail if dummy prediction fails.
-                raise ValueError(
-                    "Dummy prediction failed with run state %s and additional output: %s."
-                    % (str(status), str(additional_info))
+                msg = (
+                    f" Dummy prediction failed with run state {status} and"
+                    f" additional output: {additional_info}.",
                 )
+
+            self._logger.error(msg)
+            raise ValueError(msg)
+
         return num_run
 
     @classmethod
@@ -620,8 +645,9 @@ def fit(
         if task is None:
             y_task = type_of_target(y)
             if not self._supports_task_type(y_task):
-                raise ValueError(f"{self.__class__.__name__} does not support"
-                                 f" task {y_task}")
+                raise ValueError(
+                    f"{self.__class__.__name__} does not support" f" task {y_task}"
+                )
             self._task = self._task_type_id(y_task)
         else:
             self._task = task
@@ -675,10 +701,9 @@ def fit(
             memory_allocation = self._dataset_compression["memory_allocation"]
 
             # Remove precision reduction if we can't perform it
-            if (
-                X.dtype not in supported_precision_reductions
-                and "precision" in cast(List[str], methods)  # Removable with TypedDict
-            ):
+            if X.dtype not in supported_precision_reductions and "precision" in cast(
+                List[str], methods
+            ):  # Removable with TypedDict
                 methods = [method for method in methods if method != "precision"]
 
             with warnings_to(self._logger):
@@ -689,13 +714,15 @@ def fit(
                     is_classification=is_classification,
                     random_state=self._seed,
                     operations=methods,
-                    memory_allocation=memory_allocation
+                    memory_allocation=memory_allocation,
                 )
 
         # Check the re-sampling strategy
         try:
             self._check_resampling_strategy(
-                X=X, y=y, task=self._task,
+                X=X,
+                y=y,
+                task=self._task,
             )
         except Exception as e:
             self._fit_cleanup()
@@ -710,16 +737,16 @@ def fit(
         # It can be provided in the constructor, or automatically
         # defined in the estimator fit call
         if self._metric is None:
-            raise ValueError('No metric given.')
+            raise ValueError("No metric given.")
         if not isinstance(self._metric, Scorer):
-            raise ValueError('Metric must be instance of '
-                             'autosklearn.metrics.Scorer.')
+            raise ValueError(
+                "Metric must be instance of " "autosklearn.metrics.Scorer."
+            )
 
         # If no dask client was provided, we create one, so that we can
         # start a ensemble process in parallel to smbo optimize
-        if (
-            self._dask_client is None and
-            (self._ensemble_size > 0 or self._n_jobs is not None and self._n_jobs > 1)
+        if self._dask_client is None and (
+            self._ensemble_size > 0 or self._n_jobs is not None and self._n_jobs > 1
         ):
             self._create_dask_client()
         else:
@@ -732,78 +759,95 @@ def fit(
         self._feat_type = self.InputValidator.feature_validator.feat_type
 
         # Produce debug information to the logfile
-        self._logger.debug('Starting to print environment information')
-        self._logger.debug('  Python version: %s', sys.version.split('\n'))
+        self._logger.debug("Starting to print environment information")
+        self._logger.debug("  Python version: %s", sys.version.split("\n"))
         try:
-            self._logger.debug(f'\tDistribution: {distro.id()}-{distro.version()}-{distro.name()}')
+            self._logger.debug(
+                f"\tDistribution: {distro.id()}-{distro.version()}-{distro.name()}"
+            )
         except AttributeError:
             pass
 
-        self._logger.debug('  System: %s', platform.system())
-        self._logger.debug('  Machine: %s', platform.machine())
-        self._logger.debug('  Platform: %s', platform.platform())
+        self._logger.debug("  System: %s", platform.system())
+        self._logger.debug("  Machine: %s", platform.machine())
+        self._logger.debug("  Platform: %s", platform.platform())
         # UNAME appears to leak sensible information
         # self._logger.debug('  uname: %s', platform.uname())
-        self._logger.debug('  Version: %s', platform.version())
-        self._logger.debug('  Mac version: %s', platform.mac_ver())
-        requirements = pkg_resources.resource_string('autosklearn', 'requirements.txt')
-        requirements = requirements.decode('utf-8')
-        requirements = [requirement for requirement in requirements.split('\n')]
+        self._logger.debug("  Version: %s", platform.version())
+        self._logger.debug("  Mac version: %s", platform.mac_ver())
+        requirements = pkg_resources.resource_string("autosklearn", "requirements.txt")
+        requirements = requirements.decode("utf-8")
+        requirements = [requirement for requirement in requirements.split("\n")]
         for requirement in requirements:
             if not requirement:
                 continue
             match = RE_PATTERN.match(requirement)
             if match:
-                name = match.group('name')
+                name = match.group("name")
                 module_dist = pkg_resources.get_distribution(name)
-                self._logger.debug('  %s', module_dist)
+                self._logger.debug("  %s", module_dist)
             else:
-                raise ValueError('Unable to read requirement: %s' % requirement)
-        self._logger.debug('Done printing environment information')
-        self._logger.debug('Starting to print arguments to auto-sklearn')
-        self._logger.debug('  tmp_folder: %s', self._backend.context._temporary_directory)
-        self._logger.debug('  time_left_for_this_task: %f', self._time_for_task)
-        self._logger.debug('  per_run_time_limit: %f', self._per_run_time_limit)
+                raise ValueError("Unable to read requirement: %s" % requirement)
+        self._logger.debug("Done printing environment information")
+        self._logger.debug("Starting to print arguments to auto-sklearn")
+        self._logger.debug(
+            "  tmp_folder: %s", self._backend.context._temporary_directory
+        )
+        self._logger.debug("  time_left_for_this_task: %f", self._time_for_task)
+        self._logger.debug("  per_run_time_limit: %f", self._per_run_time_limit)
         self._logger.debug(
-            '  initial_configurations_via_metalearning: %d',
+            "  initial_configurations_via_metalearning: %d",
             self._initial_configurations_via_metalearning,
         )
-        self._logger.debug('  ensemble_size: %d', self._ensemble_size)
-        self._logger.debug('  ensemble_nbest: %f', self._ensemble_nbest)
-        self._logger.debug('  max_models_on_disc: %s', str(self._max_models_on_disc))
-        self._logger.debug('  seed: %d', self._seed)
-        self._logger.debug('  memory_limit: %s', str(self._memory_limit))
-        self._logger.debug('  metadata_directory: %s', self._metadata_directory)
-        self._logger.debug('  debug_mode: %s', self._debug_mode)
-        self._logger.debug('  include: %s', str(self._include))
-        self._logger.debug('  exclude: %s', str(self._exclude))
-        self._logger.debug('  resampling_strategy: %s', str(self._resampling_strategy))
-        self._logger.debug('  resampling_strategy_arguments: %s',
-                           str(self._resampling_strategy_arguments))
-        self._logger.debug('  n_jobs: %s', str(self._n_jobs))
-        self._logger.debug('  multiprocessing_context: %s', str(self._multiprocessing_context))
-        self._logger.debug('  dask_client: %s', str(self._dask_client))
-        self._logger.debug('  precision: %s', str(self.precision))
-        self._logger.debug('  disable_evaluator_output: %s', str(self._disable_evaluator_output))
-        self._logger.debug('  get_smac_objective_callback: %s', str(self._get_smac_object_callback))
-        self._logger.debug('  smac_scenario_args: %s', str(self._smac_scenario_args))
-        self._logger.debug('  logging_config: %s', str(self.logging_config))
-        self._logger.debug('  metric: %s', str(self._metric))
-        self._logger.debug('Done printing arguments to auto-sklearn')
-        self._logger.debug('Starting to print available components')
+        self._logger.debug("  ensemble_size: %d", self._ensemble_size)
+        self._logger.debug("  ensemble_nbest: %f", self._ensemble_nbest)
+        self._logger.debug("  max_models_on_disc: %s", str(self._max_models_on_disc))
+        self._logger.debug("  seed: %d", self._seed)
+        self._logger.debug("  memory_limit: %s", str(self._memory_limit))
+        self._logger.debug("  metadata_directory: %s", self._metadata_directory)
+        self._logger.debug("  debug_mode: %s", self._debug_mode)
+        self._logger.debug("  include: %s", str(self._include))
+        self._logger.debug("  exclude: %s", str(self._exclude))
+        self._logger.debug("  resampling_strategy: %s", str(self._resampling_strategy))
+        self._logger.debug(
+            "  resampling_strategy_arguments: %s",
+            str(self._resampling_strategy_arguments),
+        )
+        self._logger.debug("  n_jobs: %s", str(self._n_jobs))
+        self._logger.debug(
+            "  multiprocessing_context: %s", str(self._multiprocessing_context)
+        )
+        self._logger.debug("  dask_client: %s", str(self._dask_client))
+        self._logger.debug("  precision: %s", str(self.precision))
+        self._logger.debug(
+            "  disable_evaluator_output: %s", str(self._disable_evaluator_output)
+        )
+        self._logger.debug(
+            "  get_smac_objective_callback: %s", str(self._get_smac_object_callback)
+        )
+        self._logger.debug("  smac_scenario_args: %s", str(self._smac_scenario_args))
+        self._logger.debug("  logging_config: %s", str(self.logging_config))
+        self._logger.debug("  metric: %s", str(self._metric))
+        self._logger.debug("Done printing arguments to auto-sklearn")
+        self._logger.debug("Starting to print available components")
         for choice in (
-            ClassifierChoice, RegressorChoice, FeaturePreprocessorChoice,
-            OHEChoice, RescalingChoice, CoalescenseChoice,
+            ClassifierChoice,
+            RegressorChoice,
+            FeaturePreprocessorChoice,
+            OHEChoice,
+            RescalingChoice,
+            CoalescenseChoice,
         ):
             self._logger.debug(
-                '%s: %s',
+                "%s: %s",
                 choice.__name__,
                 choice.get_components(),
             )
-        self._logger.debug('Done printing available components')
+        self._logger.debug("Done printing available components")
 
         datamanager = XYDataManager(
-            X, y,
+            X,
+            y,
             X_test=X_test,
             y_test=y_test,
             task=self._task,
@@ -812,7 +856,7 @@ def fit(
         )
 
         self._backend._make_internals_directory()
-        self._label_num = datamanager.info['label_num']
+        self._label_num = datamanager.info["label_num"]
 
         # == Pickle the data manager to speed up loading
         self._backend.save_datamanager(datamanager)
@@ -824,7 +868,8 @@ def fit(
                 self._dataset_name,
                 self._time_for_task,
                 time_for_load_data,
-                self._logger)
+                self._logger,
+            )
 
         # = Create a searchspace
         # Do this before One Hot Encoding to make sure that it creates a
@@ -852,7 +897,7 @@ def fit(
         # Do this before calculating the meta-features to make sure that the
         # dummy predictions are actually included in the ensemble even if
         # calculating the meta-features takes very long
-        ensemble_task_name = 'runEnsemble'
+        ensemble_task_name = "runEnsemble"
         self._stopwatch.start_task(ensemble_task_name)
         elapsed_time = self._stopwatch.wall_elapsed(self._dataset_name)
         time_left_for_ensembles = max(0, self._time_for_task - elapsed_time)
@@ -861,15 +906,19 @@ def fit(
             # Fit only raises error when ensemble_size is not zero but
             # time_left_for_ensembles is zero.
             if self._ensemble_size > 0:
-                raise ValueError("Not starting ensemble builder because there "
-                                 "is no time left. Try increasing the value "
-                                 "of time_left_for_this_task.")
+                raise ValueError(
+                    "Not starting ensemble builder because there "
+                    "is no time left. Try increasing the value "
+                    "of time_left_for_this_task."
+                )
         elif self._ensemble_size <= 0:
-            self._logger.info('Not starting ensemble builder because '
-                              'ensemble size is <= 0.')
+            self._logger.info(
+                "Not starting ensemble builder because " "ensemble size is <= 0."
+            )
         else:
             self._logger.info(
-                'Start Ensemble with %5.2fsec time left' % time_left_for_ensembles)
+                "Start Ensemble with %5.2fsec time left" % time_left_for_ensembles
+            )
 
             proc_ensemble = EnsembleBuilderManager(
                 start_time=time.time(),
@@ -900,26 +949,26 @@ def fit(
             pass
 
         # => RUN SMAC
-        smac_task_name = 'runSMAC'
+        smac_task_name = "runSMAC"
         self._stopwatch.start_task(smac_task_name)
         elapsed_time = self._stopwatch.wall_elapsed(self._dataset_name)
         time_left_for_smac = max(0, self._time_for_task - elapsed_time)
 
         if self._logger:
-            self._logger.info(
-                'Start SMAC with %5.2fsec time left' % time_left_for_smac)
+            self._logger.info("Start SMAC with %5.2fsec time left" % time_left_for_smac)
         if time_left_for_smac <= 0:
-            self._logger.warning("Not starting SMAC because there is no time "
-                                 "left.")
+            self._logger.warning("Not starting SMAC because there is no time " "left.")
             _proc_smac = None
             self._budget_type = None
         else:
-            if self._per_run_time_limit is None or \
-                    self._per_run_time_limit > time_left_for_smac:
+            if (
+                self._per_run_time_limit is None
+                or self._per_run_time_limit > time_left_for_smac
+            ):
                 self._logger.warning(
-                    'Time limit for a single run is higher than total time '
-                    'limit. Capping the limit for a single run to the total '
-                    'time given to SMAC (%f)' % time_left_for_smac
+                    "Time limit for a single run is higher than total time "
+                    "limit. Capping the limit for a single run to the total "
+                    "time given to SMAC (%f)" % time_left_for_smac
                 )
                 per_run_time_limit = time_left_for_smac
             else:
@@ -928,7 +977,7 @@ def fit(
             # Make sure that at least 2 models are created for the ensemble process
             num_models = time_left_for_smac // per_run_time_limit
             if num_models < 2:
-                per_run_time_limit = time_left_for_smac//2
+                per_run_time_limit = time_left_for_smac // 2
                 self._logger.warning(
                     "Capping the per_run_time_limit to {} to have "
                     "time for a least 2 models in each process.".format(
@@ -964,19 +1013,24 @@ def fit(
                 port=self._logger_port,
                 pynisher_context=self._multiprocessing_context,
                 ensemble_callback=proc_ensemble,
-                trials_callback=self._get_trials_callback
+                trials_callback=self._get_trials_callback,
             )
 
             try:
-                self.runhistory_, self.trajectory_, self._budget_type = \
-                    _proc_smac.run_smbo()
+                (
+                    self.runhistory_,
+                    self.trajectory_,
+                    self._budget_type,
+                ) = _proc_smac.run_smbo()
                 trajectory_filename = os.path.join(
                     self._backend.get_smac_output_directory_for_run(self._seed),
-                    'trajectory.json')
-                saveable_trajectory = \
-                    [list(entry[:2]) + [entry[2].get_dictionary()] + list(entry[3:])
-                     for entry in self.trajectory_]
-                with open(trajectory_filename, 'w') as fh:
+                    "trajectory.json",
+                )
+                saveable_trajectory = [
+                    list(entry[:2]) + [entry[2].get_dictionary()] + list(entry[3:])
+                    for entry in self.trajectory_
+                ]
+                with open(trajectory_filename, "w") as fh:
                     json.dump(saveable_trajectory, fh)
             except Exception as e:
                 self._logger.exception(e)
@@ -989,9 +1043,11 @@ def fit(
             self.ensemble_performance_history = list(proc_ensemble.history)
 
             if len(proc_ensemble.futures) > 0:
-                # Now we need to wait for the future to return as it cannot be cancelled while it
-                # is running: https://stackoverflow.com/a/49203129
-                self._logger.info("Ensemble script still running, waiting for it to finish.")
+                # Now we need to wait for the future to return as it cannot be cancelled
+                # while it is running: https://stackoverflow.com/a/49203129
+                self._logger.info(
+                    "Ensemble script still running, waiting for it to finish."
+                )
                 result = proc_ensemble.futures.pop().result()
                 if result:
                     ensemble_history, _, _, _, _ = result
@@ -1001,7 +1057,10 @@ def fit(
             # save the ensemble performance history file
             if len(self.ensemble_performance_history) > 0:
                 pd.DataFrame(self.ensemble_performance_history).to_json(
-                        os.path.join(self._backend.internals_directory, 'ensemble_history.json'))
+                    os.path.join(
+                        self._backend.internals_directory, "ensemble_history.json"
+                    )
+                )
 
         if load_models:
             self._logger.info("Loading models...")
@@ -1046,40 +1105,58 @@ def _check_resampling_strategy(
         """
         is_split_object = isinstance(
             self._resampling_strategy,
-            (BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit)
+            (BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit),
         )
 
-        if self._resampling_strategy not in [
-                'holdout',
-                'holdout-iterative-fit',
-                'cv',
-                'cv-iterative-fit',
-                'partial-cv',
-                'partial-cv-iterative-fit',
-        ] and not is_split_object:
-            raise ValueError('Illegal resampling strategy: %s' % self._resampling_strategy)
+        if (
+            self._resampling_strategy
+            not in [
+                "holdout",
+                "holdout-iterative-fit",
+                "cv",
+                "cv-iterative-fit",
+                "partial-cv",
+                "partial-cv-iterative-fit",
+            ]
+            and not is_split_object
+        ):
+            raise ValueError(
+                "Illegal resampling strategy: %s" % self._resampling_strategy
+            )
 
         elif is_split_object:
             TrainEvaluator.check_splitter_resampling_strategy(
-                X=X, y=y, task=task,
-                groups=self._resampling_strategy_arguments.get('groups', None),
+                X=X,
+                y=y,
+                task=task,
+                groups=self._resampling_strategy_arguments.get("groups", None),
                 resampling_strategy=self._resampling_strategy,
             )
 
-        elif self._resampling_strategy in [
-            'partial-cv',
-            'partial-cv-iterative-fit',
-        ] and self._ensemble_size != 0:
-            raise ValueError("Resampling strategy %s cannot be used "
-                             "together with ensembles." % self._resampling_strategy)
-
-        elif self._resampling_strategy in [
-            'partial-cv',
-            'cv',
-            'cv-iterative-fit',
-            'partial-cv-iterative-fit',
-        ] and 'folds' not in self._resampling_strategy_arguments:
-            self._resampling_strategy_arguments['folds'] = 5
+        elif (
+            self._resampling_strategy
+            in [
+                "partial-cv",
+                "partial-cv-iterative-fit",
+            ]
+            and self._ensemble_size != 0
+        ):
+            raise ValueError(
+                "Resampling strategy %s cannot be used "
+                "together with ensembles." % self._resampling_strategy
+            )
+
+        elif (
+            self._resampling_strategy
+            in [
+                "partial-cv",
+                "cv",
+                "cv-iterative-fit",
+                "partial-cv-iterative-fit",
+            ]
+            and "folds" not in self._resampling_strategy_arguments
+        ):
+            self._resampling_strategy_arguments["folds"] = 5
 
         return
 
@@ -1089,8 +1166,10 @@ def refit(self, X, y):
 
         # Make sure input data is valid
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("refit() is only supported after calling fit. Kindly call first "
-                             "the estimator fit() method.")
+            raise ValueError(
+                "refit() is only supported after calling fit. Kindly call first "
+                "the estimator fit() method."
+            )
         X, y = self.InputValidator.transform(X, y)
 
         if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None:
@@ -1142,7 +1221,7 @@ def fit_pipeline(
         X: SUPPORTED_FEAT_TYPES,
         y: Union[SUPPORTED_TARGET_TYPES, spmatrix],
         is_classification: bool,
-        config: Union[Configuration,  Dict[str, Union[str, float, int]]],
+        config: Union[Configuration, Dict[str, Union[str, float, int]]],
         task: Optional[int] = None,
         dataset_name: Optional[str] = None,
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
@@ -1150,7 +1229,7 @@ def fit_pipeline(
         feat_type: Optional[List[str]] = None,
         **kwargs: Dict,
     ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
-        """ Fits and individual pipeline configuration and returns
+        """Fits and individual pipeline configuration and returns
         the result to the user.
 
         The Estimator constraints are honored, for example the resampling
@@ -1170,8 +1249,8 @@ def fit_pipeline(
         y_test: array-like
             If provided, the testing performance will be tracked on this labels
         config: Union[Configuration,  Dict[str, Union[str, float, int]]]
-            A configuration object used to define the pipeline steps. If a dictionary is passed,
-            a configuration is created based on this dictionary.
+            A configuration object used to define the pipeline steps.
+            If a dict is passed, a configuration is created based on this dict.
         dataset_name: Optional[str]
             A string to tag and identify the Auto-Sklearn run
         is_classification: bool
@@ -1202,8 +1281,9 @@ def fit_pipeline(
         if task is None:
             y_task = type_of_target(y)
             if not self._supports_task_type(y_task):
-                raise ValueError(f"{self.__class__.__name__} does not support"
-                                 f" task {y_task}")
+                raise ValueError(
+                    f"{self.__class__.__name__} does not support" f" task {y_task}"
+                )
             self._task = self._task_type_id(y_task)
         else:
             self._task = task
@@ -1217,12 +1297,16 @@ def fit_pipeline(
         # dataset
         if self.configuration_space is None:
             self.configuration_space = self.fit(
-                X=X, y=y,
-                dataset_name=dataset_name if dataset_name is not None else self._dataset_name,
+                X=X,
+                y=y,
+                dataset_name=dataset_name
+                if dataset_name is not None
+                else self._dataset_name,
                 X_test=X_test,
                 y_test=y_test,
                 feat_type=feat_type,
-                only_return_configuration_space=True)
+                only_return_configuration_space=True,
+            )
 
         # We do not want to overwrite existing runs
         self.num_run += 1
@@ -1231,25 +1315,25 @@ def fit_pipeline(
         config.config_id = self.num_run
 
         # Prepare missing components to the TAE function call
-        if 'include' not in kwargs:
-            kwargs['include'] = self._include
-        if 'exclude' not in kwargs:
-            kwargs['exclude'] = self._exclude
-        if 'memory_limit' not in kwargs:
-            kwargs['memory_limit'] = self._memory_limit
-        if 'resampling_strategy' not in kwargs:
-            kwargs['resampling_strategy'] = self._resampling_strategy
-        if 'metric' not in kwargs:
-            kwargs['metric'] = self._metric
-        if 'disable_file_output' not in kwargs:
-            kwargs['disable_file_output'] = self._disable_evaluator_output
-        if 'pynisher_context' not in kwargs:
-            kwargs['pynisher_context'] = self._multiprocessing_context
-        if 'stats' not in kwargs:
+        if "include" not in kwargs:
+            kwargs["include"] = self._include
+        if "exclude" not in kwargs:
+            kwargs["exclude"] = self._exclude
+        if "memory_limit" not in kwargs:
+            kwargs["memory_limit"] = self._memory_limit
+        if "resampling_strategy" not in kwargs:
+            kwargs["resampling_strategy"] = self._resampling_strategy
+        if "metric" not in kwargs:
+            kwargs["metric"] = self._metric
+        if "disable_file_output" not in kwargs:
+            kwargs["disable_file_output"] = self._disable_evaluator_output
+        if "pynisher_context" not in kwargs:
+            kwargs["pynisher_context"] = self._multiprocessing_context
+        if "stats" not in kwargs:
             scenario_mock = unittest.mock.Mock()
             scenario_mock.wallclock_limit = self._time_for_task
-            kwargs['stats'] = Stats(scenario_mock)
-        kwargs['stats'].start_timing()
+            kwargs["stats"] = Stats(scenario_mock)
+        kwargs["stats"].start_timing()
 
         # Fit a pipeline, which will be stored on disk
         # which we can later load via the backend
@@ -1257,10 +1341,10 @@ def fit_pipeline(
             backend=self._backend,
             autosklearn_seed=self._seed,
             abort_on_first_run_crash=False,
-            cost_for_crash=get_cost_of_crash(kwargs['metric']),
+            cost_for_crash=get_cost_of_crash(kwargs["metric"]),
             port=self._logger_port,
             **kwargs,
-            **self._resampling_strategy_arguments
+            **self._resampling_strategy_arguments,
         )
 
         run_info, run_value = ta.run_wrapper(
@@ -1269,16 +1353,16 @@ def fit_pipeline(
                 instance=None,
                 instance_specific=None,
                 seed=self._seed,
-                cutoff=kwargs.pop('cutoff', self._per_run_time_limit),
+                cutoff=kwargs.pop("cutoff", self._per_run_time_limit),
                 capped=False,
             )
         )
 
         pipeline = None
-        if kwargs['disable_file_output'] or kwargs['resampling_strategy'] == 'test':
+        if kwargs["disable_file_output"] or kwargs["resampling_strategy"] == "test":
             self._logger.warning("File output is disabled. No pipeline can returned")
         elif run_value.status == StatusType.SUCCESS:
-            if kwargs['resampling_strategy'] in ('cv', 'cv-iterative-fit'):
+            if kwargs["resampling_strategy"] in ("cv", "cv-iterative-fit"):
                 load_function = self._backend.load_cv_model_by_seed_and_id_and_budget
             else:
                 load_function = self._backend.load_model_by_seed_and_id_and_budget
@@ -1309,13 +1393,14 @@ def predict(self, X, batch_size=None, n_jobs=1):
             processes.
         """
         if (
-            self._resampling_strategy not in (
-                'holdout', 'holdout-iterative-fit', 'cv', 'cv-iterative-fit')
+            self._resampling_strategy
+            not in ("holdout", "holdout-iterative-fit", "cv", "cv-iterative-fit")
             and not self._can_predict
         ):
             raise NotImplementedError(
-                'Predict is currently not implemented for resampling '
-                'strategy %s, please call refit().' % self._resampling_strategy)
+                "Predict is currently not implemented for resampling "
+                "strategy %s, please call refit()." % self._resampling_strategy
+            )
 
         if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None:
             self._load_models()
@@ -1324,13 +1409,17 @@ def predict(self, X, batch_size=None, n_jobs=1):
         # In such cases, raise error because predict and predict_proba cannot
         # be called.
         if self.ensemble_ is None:
-            raise ValueError("Predict and predict_proba can only be called "
-                             "if 'ensemble_size != 0'")
+            raise ValueError(
+                "Predict and predict_proba can only be called "
+                "if 'ensemble_size != 0'"
+            )
 
         # Make sure that input is valid
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("predict() can only be called after performing fit(). Kindly call "
-                             "the estimator fit() method first.")
+            raise ValueError(
+                "predict() can only be called after performing fit(). Kindly call "
+                "the estimator fit() method first."
+            )
         X = self.InputValidator.feature_validator.transform(X)
 
         # Parallelize predictions across models with n_jobs processes.
@@ -1352,24 +1441,26 @@ def predict(self, X, batch_size=None, n_jobs=1):
                 check_is_fitted(list(self.cv_models_.values())[0])
                 models = self.cv_models_
             except sklearn.exceptions.NotFittedError:
-                raise ValueError('Found no fitted models!')
+                raise ValueError("Found no fitted models!")
 
         all_predictions = joblib.Parallel(n_jobs=n_jobs)(
             joblib.delayed(_model_predict)(
-                model=models[identifier],
-                X=X,
-                task=self._task,
-                batch_size=batch_size
+                model=models[identifier], X=X, task=self._task, batch_size=batch_size
             )
             for identifier in self.ensemble_.get_selected_model_identifiers()
         )
 
         if len(all_predictions) == 0:
-            raise ValueError('Something went wrong generating the predictions. '
-                             'The ensemble should consist of the following '
-                             'models: %s, the following models were loaded: '
-                             '%s' % (str(list(self.ensemble_indices_.keys())),
-                                     str(list(self.models_.keys()))))
+            raise ValueError(
+                "Something went wrong generating the predictions. "
+                "The ensemble should consist of the following "
+                "models: %s, the following models were loaded: "
+                "%s"
+                % (
+                    str(list(self.ensemble_indices_.keys())),
+                    str(list(self.models_.keys())),
+                )
+            )
 
         predictions = self.ensemble_.predict(all_predictions)
 
@@ -1381,23 +1472,33 @@ def predict(self, X, batch_size=None, n_jobs=1):
 
         return predictions
 
-    def fit_ensemble(self, y, task=None, precision=32,
-                     dataset_name=None, ensemble_nbest=None,
-                     ensemble_size=None):
+    def fit_ensemble(
+        self,
+        y,
+        task=None,
+        precision=32,
+        dataset_name=None,
+        ensemble_nbest=None,
+        ensemble_size=None,
+    ):
         # AutoSklearn does not handle sparse y for now
         y = convert_if_sparse(y)
 
-        if self._resampling_strategy in ['partial-cv', 'partial-cv-iterative-fit']:
-            raise ValueError('Cannot call fit_ensemble with resampling '
-                             'strategy %s.' % self._resampling_strategy)
+        if self._resampling_strategy in ["partial-cv", "partial-cv-iterative-fit"]:
+            raise ValueError(
+                "Cannot call fit_ensemble with resampling "
+                "strategy %s." % self._resampling_strategy
+            )
 
         if self._logger is None:
             self._logger = self._get_logger(dataset_name)
 
         # Make sure that input is valid
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("fit_ensemble() can only be called after fit. Please call the "
-                             "estimator fit() method prior to fit_ensemble().")
+            raise ValueError(
+                "fit_ensemble() can only be called after fit. Please call the "
+                "estimator fit() method prior to fit_ensemble()."
+            )
         y = self.InputValidator.target_validator.transform(y)
 
         # Create a client if needed
@@ -1432,8 +1533,10 @@ def fit_ensemble(self, y, task=None, precision=32,
         future = manager.futures.pop()
         result = future.result()
         if result is None:
-            raise ValueError("Error building the ensemble - please check the log file and command "
-                             "line output for error messages.")
+            raise ValueError(
+                "Error building the ensemble - please check the log file and command "
+                "line output for error messages."
+            )
         self.ensemble_performance_history, _, _, _, _ = result
 
         self._load_models()
@@ -1450,29 +1553,34 @@ def _load_models(self):
         if self.ensemble_:
             identifiers = self.ensemble_.get_selected_model_identifiers()
             self.models_ = self._backend.load_models_by_identifiers(identifiers)
-            if self._resampling_strategy in ('cv', 'cv-iterative-fit'):
-                self.cv_models_ = self._backend.load_cv_models_by_identifiers(identifiers)
+            if self._resampling_strategy in ("cv", "cv-iterative-fit"):
+                self.cv_models_ = self._backend.load_cv_models_by_identifiers(
+                    identifiers
+                )
             else:
                 self.cv_models_ = None
+            if len(self.models_) == 0 and self._resampling_strategy not in [
+                "partial-cv",
+                "partial-cv-iterative-fit",
+            ]:
+                raise ValueError("No models fitted!")
             if (
-                len(self.models_) == 0 and
-                self._resampling_strategy not in ['partial-cv', 'partial-cv-iterative-fit']
-            ):
-                raise ValueError('No models fitted!')
-            if (
-                self._resampling_strategy in ['cv', 'cv-iterative-fit']
+                self._resampling_strategy in ["cv", "cv-iterative-fit"]
                 and len(self.cv_models_) == 0
             ):
-                raise ValueError('No models fitted!')
+                raise ValueError("No models fitted!")
 
-        elif self._disable_evaluator_output is False or \
-                (isinstance(self._disable_evaluator_output, List) and
-                 'model' not in self._disable_evaluator_output):
+        elif self._disable_evaluator_output is False or (
+            isinstance(self._disable_evaluator_output, List)
+            and "model" not in self._disable_evaluator_output
+        ):
             model_names = self._backend.list_all_models(self._seed)
 
-            if len(model_names) == 0 and self._resampling_strategy not in \
-                    ['partial-cv', 'partial-cv-iterative-fit']:
-                raise ValueError('No models fitted!')
+            if len(model_names) == 0 and self._resampling_strategy not in [
+                "partial-cv",
+                "partial-cv-iterative-fit",
+            ]:
+                raise ValueError("No models fitted!")
 
             self.models_ = []
 
@@ -1522,8 +1630,10 @@ def score(self, X, y):
 
         # Make sure that input is valid
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("score() is only supported after calling fit. Kindly call first "
-                             "the estimator fit() method.")
+            raise ValueError(
+                "score() is only supported after calling fit. Kindly call first "
+                "the estimator fit() method."
+            )
         y = self.InputValidator.target_validator.transform(y)
 
         # Encode the prediction using the input validator
@@ -1534,10 +1644,12 @@ def score(self, X, y):
         # same representation domain
         prediction = self.InputValidator.target_validator.transform(prediction)
 
-        return calculate_metric(solution=y,
-                                prediction=prediction,
-                                task_type=self._task,
-                                metric=self._metric, )
+        return calculate_metric(
+            solution=y,
+            prediction=prediction,
+            task_type=self._task,
+            metric=self._metric,
+        )
 
     def _get_runhistory_models_performance(self):
         metric = self._metric
@@ -1549,20 +1661,24 @@ def _get_runhistory_models_performance(self):
                 continue
             # Alternatively, it is possible to also obtain the start time with
             # ``run_value.starttime``
-            endtime = pd.Timestamp(time.strftime('%Y-%m-%d %H:%M:%S',
-                                                 time.localtime(run_value.endtime)))
+            endtime = pd.Timestamp(
+                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_value.endtime))
+            )
             val_score = metric._optimum - (metric._sign * run_value.cost)
-            train_score = metric._optimum - (metric._sign * run_value.additional_info['train_loss'])
+            train_score = metric._optimum - (
+                metric._sign * run_value.additional_info["train_loss"]
+            )
             scores = {
-                'Timestamp': endtime,
-                'single_best_optimization_score': val_score,
-                'single_best_train_score': train_score,
+                "Timestamp": endtime,
+                "single_best_optimization_score": val_score,
+                "single_best_train_score": train_score,
             }
             # Append test-scores, if data for test_loss are available.
             # This is the case, if X_test and y_test where provided.
-            if 'test_loss' in run_value.additional_info:
-                test_score = metric._optimum - (metric._sign *
-                                                run_value.additional_info['test_loss'])
+            if "test_loss" in run_value.additional_info:
+                test_score = metric._optimum - (
+                    metric._sign * run_value.additional_info["test_loss"]
+                )
                 scores["single_best_test_score"] = test_score
 
             performance_list.append(scores)
@@ -1571,13 +1687,17 @@ def _get_runhistory_models_performance(self):
     @property
     def performance_over_time_(self):
         individual_performance_frame = self._get_runhistory_models_performance()
-        best_values = pd.Series({'single_best_optimization_score': -np.inf,
-                                 'single_best_test_score': -np.inf,
-                                 'single_best_train_score': -np.inf})
+        best_values = pd.Series(
+            {
+                "single_best_optimization_score": -np.inf,
+                "single_best_test_score": -np.inf,
+                "single_best_train_score": -np.inf,
+            }
+        )
         for idx in individual_performance_frame.index:
             if (
-                individual_performance_frame.loc[idx, 'single_best_optimization_score']
-                > best_values['single_best_optimization_score']
+                individual_performance_frame.loc[idx, "single_best_optimization_score"]
+                > best_values["single_best_optimization_score"]
             ):
                 best_values = individual_performance_frame.loc[idx]
             individual_performance_frame.loc[idx] = best_values
@@ -1586,21 +1706,27 @@ def performance_over_time_(self):
 
         if self._ensemble_size != 0:
             ensemble_performance_frame = pd.DataFrame(self.ensemble_performance_history)
-            best_values = pd.Series({'ensemble_optimization_score': -np.inf,
-                                    'ensemble_test_score': -np.inf})
+            best_values = pd.Series(
+                {"ensemble_optimization_score": -np.inf, "ensemble_test_score": -np.inf}
+            )
             for idx in ensemble_performance_frame.index:
                 if (
-                    ensemble_performance_frame.loc[idx, 'ensemble_optimization_score']
-                    > best_values['ensemble_optimization_score']
+                    ensemble_performance_frame.loc[idx, "ensemble_optimization_score"]
+                    > best_values["ensemble_optimization_score"]
                 ):
                     best_values = ensemble_performance_frame.loc[idx]
                 ensemble_performance_frame.loc[idx] = best_values
 
-            performance_over_time = pd.merge(
-                ensemble_performance_frame,
-                individual_performance_frame,
-                on="Timestamp", how='outer'
-            ).sort_values('Timestamp').fillna(method='ffill')
+            performance_over_time = (
+                pd.merge(
+                    ensemble_performance_frame,
+                    individual_performance_frame,
+                    on="Timestamp",
+                    how="outer",
+                )
+                .sort_values("Timestamp")
+                .fillna(method="ffill")
+            )
 
         return performance_over_time
 
@@ -1623,8 +1749,8 @@ def cv_results_(self):
         # TODO: add those arguments
 
         # TODO remove this restriction!
-        if self._resampling_strategy in ['partial-cv', 'partial-cv-iterative-fit']:
-            raise ValueError('Cannot call cv_results when using partial-cv!')
+        if self._resampling_strategy in ["partial-cv", "partial-cv-iterative-fit"]:
+            raise ValueError("Cannot call cv_results when using partial-cv!")
 
         parameter_dictionaries = dict()
         masks = dict()
@@ -1659,19 +1785,19 @@ def cv_results_(self):
 
             s = run_value.status
             if s == StatusType.SUCCESS:
-                status.append('Success')
+                status.append("Success")
             elif s == StatusType.DONOTADVANCE:
-                status.append('Success (but do not advance to higher budget)')
+                status.append("Success (but do not advance to higher budget)")
             elif s == StatusType.TIMEOUT:
-                status.append('Timeout')
+                status.append("Timeout")
             elif s == StatusType.CRASHED:
-                status.append('Crash')
+                status.append("Crash")
             elif s == StatusType.ABORT:
-                status.append('Abort')
+                status.append("Abort")
             elif s == StatusType.MEMOUT:
-                status.append('Memout')
-            # TODO remove StatusType.RUNNING at some point in the future when the new SMAC 0.13.2
-            #  is the new minimum required version!
+                status.append("Memout")
+            # TODO remove StatusType.RUNNING at some point in the future when the new
+            # SMAC 0.13.2 is the new minimum required version!
             elif s in (StatusType.STOP, StatusType.RUNNING):
                 continue
             else:
@@ -1679,7 +1805,9 @@ def cv_results_(self):
 
             param_dict = config.get_dictionary()
             params.append(param_dict)
-            mean_test_score.append(self._metric._optimum - (self._metric._sign * run_value.cost))
+            mean_test_score.append(
+                self._metric._optimum - (self._metric._sign * run_value.cost)
+            )
             mean_fit_time.append(run_value.time)
             budgets.append(run_key.budget)
 
@@ -1705,69 +1833,79 @@ def cv_results_(self):
                 metric_dict[metric.name].append(metric_value)
                 metric_mask[metric.name].append(mask_value)
 
-        results['mean_test_score'] = np.array(mean_test_score)
+        results["mean_test_score"] = np.array(mean_test_score)
         for name in metric_name:
-            masked_array = ma.MaskedArray(metric_dict[name],
-                                          metric_mask[name])
-            results['metric_%s' % name] = masked_array
+            masked_array = ma.MaskedArray(metric_dict[name], metric_mask[name])
+            results["metric_%s" % name] = masked_array
 
-        results['mean_fit_time'] = np.array(mean_fit_time)
-        results['params'] = params
-        rank_order = -1 * self._metric._sign * results['mean_test_score']
-        results['rank_test_scores'] = scipy.stats.rankdata(rank_order, method='min')
-        results['status'] = status
-        results['budgets'] = budgets
+        results["mean_fit_time"] = np.array(mean_fit_time)
+        results["params"] = params
+        rank_order = -1 * self._metric._sign * results["mean_test_score"]
+        results["rank_test_scores"] = scipy.stats.rankdata(rank_order, method="min")
+        results["status"] = status
+        results["budgets"] = budgets
 
         for hp_name in hp_names:
-            masked_array = ma.MaskedArray(parameter_dictionaries[hp_name],
-                                          masks[hp_name])
-            results['param_%s' % hp_name] = masked_array
+            masked_array = ma.MaskedArray(
+                parameter_dictionaries[hp_name], masks[hp_name]
+            )
+            results["param_%s" % hp_name] = masked_array
 
         return results
 
     def sprint_statistics(self):
         cv_results = self.cv_results_
         sio = io.StringIO()
-        sio.write('auto-sklearn results:\n')
-        sio.write('  Dataset name: %s\n' % self._dataset_name)
-        sio.write('  Metric: %s\n' % self._metric)
-        idx_success = np.where(np.array(
-            [status in ['Success', 'Success (but do not advance to higher budget)']
-             for status in cv_results['status']]
-        ))[0]
+        sio.write("auto-sklearn results:\n")
+        sio.write("  Dataset name: %s\n" % self._dataset_name)
+        sio.write("  Metric: %s\n" % self._metric)
+        idx_success = np.where(
+            np.array(
+                [
+                    status
+                    in ["Success", "Success (but do not advance to higher budget)"]
+                    for status in cv_results["status"]
+                ]
+            )
+        )[0]
         if len(idx_success) > 0:
             if not self._metric._optimum:
-                idx_best_run = np.argmin(cv_results['mean_test_score'][idx_success])
+                idx_best_run = np.argmin(cv_results["mean_test_score"][idx_success])
             else:
-                idx_best_run = np.argmax(cv_results['mean_test_score'][idx_success])
-            best_score = cv_results['mean_test_score'][idx_success][idx_best_run]
-            sio.write('  Best validation score: %f\n' % best_score)
-        num_runs = len(cv_results['status'])
-        sio.write('  Number of target algorithm runs: %d\n' % num_runs)
-        num_success = sum([
-            s in ['Success', 'Success (but do not advance to higher budget)']
-            for s in cv_results['status']
-        ])
-        sio.write('  Number of successful target algorithm runs: %d\n' % num_success)
-        num_crash = sum([s == 'Crash' for s in cv_results['status']])
-        sio.write('  Number of crashed target algorithm runs: %d\n' % num_crash)
-        num_timeout = sum([s == 'Timeout' for s in cv_results['status']])
-        sio.write('  Number of target algorithms that exceeded the time '
-                  'limit: %d\n' % num_timeout)
-        num_memout = sum([s == 'Memout' for s in cv_results['status']])
-        sio.write('  Number of target algorithms that exceeded the memory '
-                  'limit: %d\n' % num_memout)
+                idx_best_run = np.argmax(cv_results["mean_test_score"][idx_success])
+            best_score = cv_results["mean_test_score"][idx_success][idx_best_run]
+            sio.write("  Best validation score: %f\n" % best_score)
+        num_runs = len(cv_results["status"])
+        sio.write("  Number of target algorithm runs: %d\n" % num_runs)
+        num_success = sum(
+            [
+                s in ["Success", "Success (but do not advance to higher budget)"]
+                for s in cv_results["status"]
+            ]
+        )
+        sio.write("  Number of successful target algorithm runs: %d\n" % num_success)
+        num_crash = sum([s == "Crash" for s in cv_results["status"]])
+        sio.write("  Number of crashed target algorithm runs: %d\n" % num_crash)
+        num_timeout = sum([s == "Timeout" for s in cv_results["status"]])
+        sio.write(
+            "  Number of target algorithms that exceeded the time "
+            "limit: %d\n" % num_timeout
+        )
+        num_memout = sum([s == "Memout" for s in cv_results["status"]])
+        sio.write(
+            "  Number of target algorithms that exceeded the memory "
+            "limit: %d\n" % num_memout
+        )
         return sio.getvalue()
 
     def get_models_with_weights(self):
-        if self.models_ is None or len(self.models_) == 0 or \
-                self.ensemble_ is None:
+        if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None:
             self._load_models()
 
         return self.ensemble_.get_models_with_weights(self.models_)
 
     def show_models(self) -> Dict[int, Any]:
-        """ Returns a dictionary containing dictionaries of ensemble models.
+        """Returns a dictionary containing dictionaries of ensemble models.
 
         Each model in the ensemble can be accessed by giving its ``model_id`` as key.
 
@@ -1821,11 +1959,12 @@ def show_models(self) -> Dict[int, Any]:
                     'rank': 2,
                     'cost': 0.4550418898836528,
                     'ensemble_weight': 0.3,
-                    'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
+                    'data_preprocessor': <pipeline.components.data_preprocessing....>,
                     'feature_preprocessor': <autosklearn.pipeline.components....>,
                     'regressor': <autosklearn.pipeline.components.regression....>,
-                    'sklearn_regressor': ARDRegression(alpha_1=0.0003701926442639788,...)
-                    }...
+                    'sklearn_regressor': ARDRegression(alpha_1=0.027,...)
+                    },
+                ...
             }
 
         Returns
@@ -1833,8 +1972,7 @@ def show_models(self) -> Dict[int, Any]:
         Dict(int, Any) : dictionary of length = number of models in the ensemble
             A dictionary of models in the ensemble, where ``model_id`` is the key.
 
-        """
-
+        """  # noqa: E501
         ensemble_dict = {}
 
         def has_key(rv, key):
@@ -1842,26 +1980,25 @@ def has_key(rv, key):
 
         table_dict = {}
         for rkey, rval in self.runhistory_.data.items():
-            if has_key(rval, 'num_run'):
-                model_id = rval.additional_info['num_run']
-                table_dict[model_id] = {
-                        'model_id': model_id,
-                        'cost': rval.cost
-                        }
+            if has_key(rval, "num_run"):
+                model_id = rval.additional_info["num_run"]
+                table_dict[model_id] = {"model_id": model_id, "cost": rval.cost}
 
         # Checking if the dictionary is empty
         if not table_dict:
-            raise RuntimeError('No model found. Try increasing \'time_left_for_this_task\'.')
+            raise RuntimeError(
+                "No model found. Try increasing 'time_left_for_this_task'."
+            )
 
         for i, weight in enumerate(self.ensemble_.weights_):
             (_, model_id, _) = self.ensemble_.identifiers_[i]
-            table_dict[model_id]['ensemble_weight'] = weight
+            table_dict[model_id]["ensemble_weight"] = weight
 
-        table = pd.DataFrame.from_dict(table_dict, orient='index')
-        table.sort_values(by='cost', inplace=True)
+        table = pd.DataFrame.from_dict(table_dict, orient="index")
+        table.sort_values(by="cost", inplace=True)
 
-        # Checking which resampling strategy is chosen and selecting the appropriate models
-        is_cv = (self._resampling_strategy == "cv")
+        # Check which resampling strategy is chosen and selecting the appropriate models
+        is_cv = self._resampling_strategy == "cv"
         models = self.cv_models_ if is_cv else self.models_
 
         rank = 1  # Initializing rank for the first model
@@ -1869,23 +2006,23 @@ def has_key(rv, key):
             model_dict = {}  # Declaring model dictionary
 
             # Inserting model_id, rank, cost and ensemble weight
-            model_dict['model_id'] = table.loc[model_id]['model_id'].astype(int)
-            model_dict['rank'] = rank
-            model_dict['cost'] = table.loc[model_id]['cost']
-            model_dict['ensemble_weight'] = table.loc[model_id]['ensemble_weight']
+            model_dict["model_id"] = table.loc[model_id]["model_id"].astype(int)
+            model_dict["rank"] = rank
+            model_dict["cost"] = table.loc[model_id]["cost"]
+            model_dict["ensemble_weight"] = table.loc[model_id]["ensemble_weight"]
             rank += 1  # Incrementing rank by 1 for the next model
 
             # The steps in the models pipeline are as follows:
             # 'data_preprocessor': DataPreprocessor,
             # 'balancing': Balancing,
             # 'feature_preprocessor': FeaturePreprocessorChoice,
-            # 'classifier'/'regressor': ClassifierChoice/RegressorChoice (autosklearn wrapped model)
+            # 'classifier'/'regressor': ClassifierChoice/RegressorChoice (wrapped model)
 
             # For 'cv' (cross validation) strategy
             if is_cv:
                 # Voting model created by cross validation
                 cv_voting_ensemble = model
-                model_dict['voting_model'] = cv_voting_ensemble
+                model_dict["voting_model"] = cv_voting_ensemble
 
                 # List of models, each trained on one cv fold
                 cv_models = []
@@ -1894,9 +2031,11 @@ def has_key(rv, key):
 
                     # Adding sklearn model to the model dictionary
                     model_type, autosklearn_wrapped_model = cv_model.steps[-1]
-                    estimator[f'sklearn_{model_type}'] = autosklearn_wrapped_model.choice.estimator
+                    estimator[
+                        f"sklearn_{model_type}"
+                    ] = autosklearn_wrapped_model.choice.estimator
                     cv_models.append(estimator)
-                model_dict['estimators'] = cv_models
+                model_dict["estimators"] = cv_models
 
             # For any other strategy
             else:
@@ -1905,7 +2044,9 @@ def has_key(rv, key):
 
                 # Adding sklearn model to the model dictionary
                 model_type, autosklearn_wrapped_model = model.steps[-1]
-                model_dict[f'sklearn_{model_type}'] = autosklearn_wrapped_model.choice.estimator
+                model_dict[
+                    f"sklearn_{model_type}"
+                ] = autosklearn_wrapped_model.choice.estimator
 
             # Insterting model_dict in the ensemble dictionary
             ensemble_dict[model_id] = model_dict
@@ -1920,21 +2061,20 @@ def _create_search_space(
         include: Optional[Dict[str, List[str]]] = None,
         exclude: Optional[Dict[str, List[str]]] = None,
     ):
-        task_name = 'CreateConfigSpace'
+        task_name = "CreateConfigSpace"
 
         self._stopwatch.start_task(task_name)
-        configspace_path = os.path.join(tmp_dir, 'space.json')
+        configspace_path = os.path.join(tmp_dir, "space.json")
         configuration_space = pipeline.get_configuration_space(
             datamanager.info,
             include=include,
             exclude=exclude,
         )
         configuration_space = self.configuration_space_created_hook(
-            datamanager, configuration_space)
+            datamanager, configuration_space
+        )
         backend.write_txt_file(
-            configspace_path,
-            cs_json.write(configuration_space),
-            'Configuration space'
+            configspace_path, cs_json.write(configuration_space), "Configuration space"
         )
         self._stopwatch.stop_task(task_name)
 
@@ -1960,9 +2100,9 @@ def __del__(self):
 class AutoMLClassifier(AutoML):
 
     _task_mapping = {
-        'multilabel-indicator': MULTILABEL_CLASSIFICATION,
-        'multiclass': MULTICLASS_CLASSIFICATION,
-        'binary': BINARY_CLASSIFICATION,
+        "multilabel-indicator": MULTILABEL_CLASSIFICATION,
+        "multiclass": MULTICLASS_CLASSIFICATION,
+        "binary": BINARY_CLASSIFICATION,
     }
 
     @classmethod
@@ -1985,7 +2125,8 @@ def fit(
         load_models: bool = True,
     ):
         return super().fit(
-            X, y,
+            X,
+            y,
             X_test=X_test,
             y_test=y_test,
             feat_type=feat_type,
@@ -1999,7 +2140,7 @@ def fit_pipeline(
         self,
         X: SUPPORTED_FEAT_TYPES,
         y: Union[SUPPORTED_TARGET_TYPES, spmatrix],
-        config: Union[Configuration,  Dict[str, Union[str, float, int]]],
+        config: Union[Configuration, Dict[str, Union[str, float, int]]],
         dataset_name: Optional[str] = None,
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
         y_test: Optional[Union[SUPPORTED_TARGET_TYPES, spmatrix]] = None,
@@ -2007,8 +2148,10 @@ def fit_pipeline(
         **kwargs,
     ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
         return super().fit_pipeline(
-            X=X, y=y,
-            X_test=X_test, y_test=y_test,
+            X=X,
+            y=y,
+            X_test=X_test,
+            y_test=y_test,
             dataset_name=dataset_name,
             config=config,
             is_classification=True,
@@ -2017,12 +2160,15 @@ def fit_pipeline(
         )
 
     def predict(self, X, batch_size=None, n_jobs=1):
-        predicted_probabilities = super().predict(X, batch_size=batch_size,
-                                                  n_jobs=n_jobs)
+        predicted_probabilities = super().predict(
+            X, batch_size=batch_size, n_jobs=n_jobs
+        )
 
         if self.InputValidator is None or not self.InputValidator._is_fitted:
-            raise ValueError("predict() is only supported after calling fit. Kindly call first "
-                             "the estimator fit() method.")
+            raise ValueError(
+                "predict() is only supported after calling fit. Kindly call first "
+                "the estimator fit() method."
+            )
         if self.InputValidator.target_validator.is_single_column_target():
             predicted_indexes = np.argmax(predicted_probabilities, axis=1)
         else:
@@ -2037,9 +2183,9 @@ def predict_proba(self, X, batch_size=None, n_jobs=1):
 class AutoMLRegressor(AutoML):
 
     _task_mapping = {
-        'continuous-multioutput': MULTIOUTPUT_REGRESSION,
-        'continuous': REGRESSION,
-        'multiclass': REGRESSION,
+        "continuous-multioutput": MULTIOUTPUT_REGRESSION,
+        "continuous": REGRESSION,
+        "multiclass": REGRESSION,
     }
 
     @classmethod
@@ -2062,7 +2208,8 @@ def fit(
         load_models: bool = True,
     ):
         return super().fit(
-            X, y,
+            X,
+            y,
             X_test=X_test,
             y_test=y_test,
             feat_type=feat_type,
@@ -2076,7 +2223,7 @@ def fit_pipeline(
         self,
         X: SUPPORTED_FEAT_TYPES,
         y: Union[SUPPORTED_TARGET_TYPES, spmatrix],
-        config: Union[Configuration,  Dict[str, Union[str, float, int]]],
+        config: Union[Configuration, Dict[str, Union[str, float, int]]],
         dataset_name: Optional[str] = None,
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
         y_test: Optional[Union[SUPPORTED_TARGET_TYPES, spmatrix]] = None,
@@ -2084,8 +2231,10 @@ def fit_pipeline(
         **kwargs: Dict,
     ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
         return super().fit_pipeline(
-            X=X, y=y,
-            X_test=X_test, y_test=y_test,
+            X=X,
+            y=y,
+            X_test=X_test,
+            y_test=y_test,
             config=config,
             feat_type=feat_type,
             dataset_name=dataset_name,
diff --git a/autosklearn/constants.py b/autosklearn/constants.py
index 60a025999e..2db372925c 100644
--- a/autosklearn/constants.py
+++ b/autosklearn/constants.py
@@ -7,21 +7,26 @@
 MULTIOUTPUT_REGRESSION = 5
 
 REGRESSION_TASKS = [REGRESSION, MULTIOUTPUT_REGRESSION]
-CLASSIFICATION_TASKS = [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION,
-                        MULTILABEL_CLASSIFICATION]
+CLASSIFICATION_TASKS = [
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+]
 
 TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS
 
-TASK_TYPES_TO_STRING = \
-    {BINARY_CLASSIFICATION: 'binary.classification',
-     MULTICLASS_CLASSIFICATION: 'multiclass.classification',
-     MULTILABEL_CLASSIFICATION: 'multilabel.classification',
-     REGRESSION: 'regression',
-     MULTIOUTPUT_REGRESSION: 'multioutput.regression'}
+TASK_TYPES_TO_STRING = {
+    BINARY_CLASSIFICATION: "binary.classification",
+    MULTICLASS_CLASSIFICATION: "multiclass.classification",
+    MULTILABEL_CLASSIFICATION: "multilabel.classification",
+    REGRESSION: "regression",
+    MULTIOUTPUT_REGRESSION: "multioutput.regression",
+}
 
-STRING_TO_TASK_TYPES = \
-    {'binary.classification': BINARY_CLASSIFICATION,
-     'multiclass.classification': MULTICLASS_CLASSIFICATION,
-     'multilabel.classification': MULTILABEL_CLASSIFICATION,
-     'regression': REGRESSION,
-     'multioutput.regression': MULTIOUTPUT_REGRESSION}
+STRING_TO_TASK_TYPES = {
+    "binary.classification": BINARY_CLASSIFICATION,
+    "multiclass.classification": MULTICLASS_CLASSIFICATION,
+    "multilabel.classification": MULTILABEL_CLASSIFICATION,
+    "regression": REGRESSION,
+    "multioutput.regression": MULTIOUTPUT_REGRESSION,
+}
diff --git a/autosklearn/data/abstract_data_manager.py b/autosklearn/data/abstract_data_manager.py
index 739e707334..0837d59ad0 100644
--- a/autosklearn/data/abstract_data_manager.py
+++ b/autosklearn/data/abstract_data_manager.py
@@ -2,14 +2,14 @@
 from typing import Any, Dict, Union
 
 import numpy as np
-
 import scipy.sparse
 
-from autosklearn.pipeline.components.data_preprocessing.feature_type \
-    import FeatTypeSplit
+from autosklearn.pipeline.components.data_preprocessing.feature_type import (
+    FeatTypeSplit,
+)
 
 
-class AbstractDataManager():
+class AbstractDataManager:
     __metaclass__ = abc.ABCMeta
 
     def __init__(self, name: str):
@@ -47,22 +47,26 @@ def encoder(self, value: FeatTypeSplit) -> FeatTypeSplit:
         self._encoder = value
 
     def __repr__(self) -> str:
-        return 'DataManager : ' + self.name
+        return "DataManager : " + self.name
 
     def __str__(self) -> str:
-        val = 'DataManager : ' + self.name + '\ninfo:\n'
+        val = "DataManager : " + self.name + "\ninfo:\n"
         for item in self.info:
-            val = val + '\t' + item + ' = ' + str(self.info[item]) + '\n'
-        val = val + 'data:\n'
+            val = val + "\t" + item + " = " + str(self.info[item]) + "\n"
+        val = val + "data:\n"
 
         for subset in self.data:
-            val = val + '\t%s = %s %s %s\n' % (subset, type(self.data[subset]),
-                                               str(self.data[subset].shape),
-                                               str(self.data[subset].dtype))
+            val = val + "\t%s = %s %s %s\n" % (
+                subset,
+                type(self.data[subset]),
+                str(self.data[subset].shape),
+                str(self.data[subset].dtype),
+            )
             if isinstance(self.data[subset], scipy.sparse.spmatrix):
-                val = val + '\tdensity: %f\n' % \
-                            (float(len(self.data[subset].data)) /
-                             self.data[subset].shape[0] /
-                             self.data[subset].shape[1])
-        val = val + 'feat_type:\t' + str(self.feat_type) + '\n'
+                val = val + "\tdensity: %f\n" % (
+                    float(len(self.data[subset].data))
+                    / self.data[subset].shape[0]
+                    / self.data[subset].shape[1]
+                )
+        val = val + "feat_type:\t" + str(self.feat_type) + "\n"
         return val
diff --git a/autosklearn/data/feature_validator.py b/autosklearn/data/feature_validator.py
index 1a21249775..0b7ae8e8b1 100644
--- a/autosklearn/data/feature_validator.py
+++ b/autosklearn/data/feature_validator.py
@@ -1,19 +1,16 @@
-import logging
 from typing import Dict, List, Optional, Tuple, Union, cast
 
-import numpy as np
+import logging
 
+import numpy as np
 import pandas as pd
 from pandas.api.types import is_numeric_dtype, is_sparse
-
 from scipy.sparse import csr_matrix, spmatrix
-
 from sklearn.base import BaseEstimator
 from sklearn.exceptions import NotFittedError
 
 from autosklearn.util.logging_ import PickableLoggerAdapter
 
-
 SUPPORTED_FEAT_TYPES = Union[List, pd.DataFrame, np.ndarray, spmatrix]
 
 
@@ -26,34 +23,37 @@ class FeatureValidator(BaseEstimator):
 
     Attributes
     ----------
-        feat_type: Optional[List[str]]
-            In case the dataset is not a pandas DataFrame:
-                + If provided, this list indicates which columns should be treated as categorical
-                  it is internally transformed into a dictionary that indicates a mapping from
-                  column index to categorical/numerical
-                + If not provided, by default all columns are treated as numerical
-            If the input dataset is of type pandas dataframe, this argument
-            must be none, as the column type will be inferred from the pandas dtypes.
-
-        data_type:
-            Class name of the data type provided during fit.
+    feat_type: Optional[List[str]] = None
+        In case the dataset is not a pandas DataFrame:
+            +   If provided, this list indicates which columns should be treated as
+                categorical it is internally transformed into a dictionary that
+                indicates a mapping from column index to categorical/numerical.
+            +   If not provided, by default all columns are treated as numerical
+
+        If the input dataset is of type pandas dataframe, this argument
+        must be none, as the column type will be inferred from the pandas dtypes.
+
+    data_type:
+        Class name of the data type provided during fit.
     """
-    def __init__(self,
-                 feat_type: Optional[List[str]] = None,
-                 logger: Optional[PickableLoggerAdapter] = None,
-                 ) -> None:
+
+    def __init__(
+        self,
+        feat_type: Optional[List[str]] = None,
+        logger: Optional[PickableLoggerAdapter] = None,
+    ) -> None:
         # If a dataframe was provided, we populate
         # this attribute with a mapping from column to {numerical | categorical}
-        self.feat_type: Optional[
-            Dict[Union[str, int], str]
-        ] = None
+        self.feat_type: Optional[Dict[Union[str, int], str]] = None
         if feat_type is not None:
             if isinstance(feat_type, dict):
                 self.feat_type = feat_type
             elif not isinstance(feat_type, List):
-                raise ValueError("Auto-Sklearn expects a list of categorical/"
-                                 "numerical feature types, yet a"
-                                 " {} was provided".format(type(feat_type)))
+                raise ValueError(
+                    "Auto-Sklearn expects a list of categorical/"
+                    "numerical feature types, yet a"
+                    " {} was provided".format(type(feat_type))
+                )
             else:
 
                 # Convert to a dictionary which will be passed to the ColumnTransformer
@@ -72,7 +72,7 @@ def fit(
         self,
         X_train: SUPPORTED_FEAT_TYPES,
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
-    ) -> 'FeatureValidator':
+    ) -> "FeatureValidator":
         """
         Validates input data to Auto-Sklearn.
         The supported data types are List, numpy arrays and pandas DataFrames.
@@ -86,7 +86,6 @@ def fit(
         X_test: Optional[SUPPORTED_FEAT_TYPES]
             A hold out set of data used for checking
         """
-
         # If a list was provided, it will be converted to pandas
         if isinstance(X_train, List):
             X_train, X_test = self.list_to_dataframe(X_train, X_test)
@@ -96,47 +95,56 @@ def fit(
         # Handle categorical feature identification for the pipeline
         if hasattr(X_train, "iloc"):
             if self.feat_type is not None:
-                raise ValueError("When providing a DataFrame to Auto-Sklearn, we extract "
-                                 "the feature types from the DataFrame.dtypes. That is, "
-                                 "providing the option feat_type to the fit method is not "
-                                 "supported when using a Dataframe. Please make sure that the "
-                                 "type of each column in your DataFrame is properly set. "
-                                 "More details about having the correct data type in your "
-                                 "DataFrame can be seen in "
-                                 "https://pandas.pydata.org/pandas-docs/stable/reference"
-                                 "/api/pandas.DataFrame.astype.html")
+                raise ValueError(
+                    "When providing a DataFrame to Auto-Sklearn, we extract "
+                    "the feature types from the DataFrame.dtypes. That is, "
+                    "providing the option feat_type to the fit method is not "
+                    "supported when using a Dataframe. Please make sure that the "
+                    "type of each column in your DataFrame is properly set. "
+                    "More details about having the correct data type in your "
+                    "DataFrame can be seen in "
+                    "https://pandas.pydata.org/pandas-docs/stable/reference"
+                    "/api/pandas.DataFrame.astype.html"
+                )
             else:
                 self.feat_type = self.get_feat_type_from_columns(X_train)
         else:
             # Numpy array was provided
             if self.feat_type is None:
                 # Assume numerical columns if a numpy array has no feature types
-                self.feat_type = {i: 'numerical' for i in range(np.shape(X_train)[1])}
+                self.feat_type = {i: "numerical" for i in range(np.shape(X_train)[1])}
             else:
                 # Check The feat type provided
                 if len(self.feat_type) != np.shape(X_train)[1]:
-                    raise ValueError('Array feat_type does not have same number of '
-                                     'variables as X has features. %d vs %d.' %
-                                     (len(self.feat_type), np.shape(X_train)[1]))
+                    raise ValueError(
+                        "Array feat_type does not have same number of "
+                        "variables as X has features. %d vs %d."
+                        % (len(self.feat_type), np.shape(X_train)[1])
+                    )
                 if not all([isinstance(f, str) for f in self.feat_type.values()]):
-                    raise ValueError("feat_type must only contain strings: {}".format(
-                        list(self.feat_type.values()),
-                    ))
+                    raise ValueError(
+                        "feat_type must only contain strings: {}".format(
+                            list(self.feat_type.values()),
+                        )
+                    )
 
                 for ft in self.feat_type.values():
-                    if ft.lower() not in ['categorical', 'numerical', 'string']:
-                        raise ValueError('Only `Categorical`, `Numerical` and `String` are '
-                                         'valid feature types')
+                    if ft.lower() not in ["categorical", "numerical", "string"]:
+                        raise ValueError(
+                            "Only `Categorical`, `Numerical` and `String` are "
+                            "valid feature types"
+                        )
 
         if X_test is not None:
             self._check_data(X_test)
 
             if np.shape(X_train)[1] != np.shape(X_test)[1]:
-                raise ValueError("The feature dimensionality of the train and test "
-                                 "data does not match train({}) != test({})".format(
-                                     np.shape(X_train)[1],
-                                     np.shape(X_test)[1]
-                                 ))
+                raise ValueError(
+                    "The feature dimensionality of the train and test "
+                    "data does not match train({}) != test({})".format(
+                        np.shape(X_train)[1], np.shape(X_test)[1]
+                    )
+                )
 
         self._is_fitted = True
 
@@ -162,7 +170,9 @@ def transform(
             The transformed array
         """
         if not self._is_fitted:
-            raise NotFittedError("Cannot call transform on a validator that is not fitted")
+            raise NotFittedError(
+                "Cannot call transform on a validator that is not fitted"
+            )
 
         # If a list was provided, it will be converted to pandas
         if isinstance(X, List):
@@ -177,9 +187,11 @@ def transform(
         # Not all sparse format support index sorting
         if isinstance(X_transformed, spmatrix):
             if not isinstance(X_transformed, csr_matrix):
-                self.logger.warning(f"Sparse data provided is of type {type(X_transformed)} "
-                                    "yet Auto-Sklearn only support csr_matrix. Auto-sklearn "
-                                    "will convert the provided data to the csr_matrix format.")
+                self.logger.warning(
+                    f"Sparse data provided is of type {type(X_transformed)} "
+                    "yet Auto-Sklearn only support csr_matrix. Auto-sklearn "
+                    "will convert the provided data to the csr_matrix format."
+                )
                 X_transformed = X_transformed.tocsr(copy=False)
 
             X_transformed.sort_indices()
@@ -195,45 +207,42 @@ def _check_data(
 
         Parameters
         ----------
-            X: SUPPORTED_FEAT_TYPES
-                A set of features that are going to be validated (type and dimensionality
-                checks) and a encoder fitted in the case the data needs encoding
+        X: SUPPORTED_FEAT_TYPES
+            A set of features that are going to be validated (type and dimensionality)
+            and a encoder fitted in the case the data needs encoding
         """
-
         # We consider columns that are all nan in a pandas frame as category
-        if hasattr(X, 'columns'):
+        if hasattr(X, "columns"):
             for column in cast(pd.DataFrame, X).columns:
                 if X[column].isna().all():
-                    X[column] = X[column].astype('category')
+                    X[column] = X[column].astype("category")
 
-        if not isinstance(X, (np.ndarray, pd.DataFrame)) and not isinstance(X, spmatrix):
-            raise ValueError("Auto-sklearn only supports Numpy arrays, Pandas DataFrames,"
-                             " scipy sparse and Python Lists, yet, the provided input is"
-                             " of type {}".format(
-                                 type(X)
-                             ))
+        if not isinstance(X, (np.ndarray, pd.DataFrame)) and not isinstance(
+            X, spmatrix
+        ):
+            raise ValueError(
+                "Auto-sklearn only supports Numpy arrays, Pandas DataFrames,"
+                " scipy sparse and Python Lists, yet, the provided input is"
+                " of type {}".format(type(X))
+            )
 
         if self.data_type is None:
             self.data_type = type(X)
 
         if self.data_type != type(X):
-            self.logger.warning("Auto-sklearn previously received features of type %s "
-                                "yet the current features have type %s. Changing the dtype "
-                                "of inputs to an estimator might cause problems" % (
-                                      str(self.data_type),
-                                      str(type(X)),
-                                   ),
-                                )
+            self.logger.warning(
+                f"Auto-sklearn previously received features of type {self.data_type} "
+                f"yet the current features have type {type(X)}. Changing the dtype "
+                "of inputs to an estimator might cause problems"
+            )
 
         # Do not support category/string numpy data. Only numbers
         if hasattr(X, "dtype"):
             if not np.issubdtype(X.dtype.type, np.number):  # type: ignore[union-attr]
                 raise ValueError(
-                    "When providing a numpy array to Auto-sklearn, the only valid "
-                    "dtypes are numerical ones. The provided data type {} is not supported."
-                    "".format(
-                        X.dtype.type,  # type: ignore[union-attr]
-                    )
+                    "When providing a numpy array to Auto-sklearn, the only valid"
+                    f" dtypes are numerical ones. The provided data type {X.dtype.type}"
+                    " is not supported."
                 )
 
         # Then for Pandas, we do not support Nan in categorical columns
@@ -247,12 +256,14 @@ def _check_data(
                     # To support list, we need to support object inference.
                     # In extreme cases, the train column might be all integer,
                     # and the test column might be float.
-                    self.logger.warning("Changing the dtype of the features after fit() is "
-                                        "not recommended. Fit() method was called with "
-                                        "{} whereas the new features have {} as type".format(
-                                            self.dtypes,
-                                            dtypes,
-                                        ))
+                    self.logger.warning(
+                        "Changing the dtype of the features after fit() is "
+                        "not recommended. Fit() method was called with "
+                        "{} whereas the new features have {} as type".format(
+                            self.dtypes,
+                            dtypes,
+                        )
+                    )
             else:
                 self.dtypes = dtypes
 
@@ -266,53 +277,52 @@ def get_feat_type_from_columns(
 
         Parameters
         ----------
-            X: pd.DataFrame
-                A set of features that are going to be validated (type and dimensionality
-                checks) and a encoder fitted in the case the data needs encoding
+        X: pd.DataFrame
+            A set of features that are going to be validated (type and dimensionality
+            checks) and a encoder fitted in the case the data needs encoding
+
         Returns
         -------
-            feat_type:
-                dictionary with column to feature type mapping
+        feat_type:
+            dictionary with column to feature type mapping
         """
-
         # Also, register the feature types for the estimator
         feat_type = {}
 
         # Make sure each column is a valid type
         for i, column in enumerate(X.columns):
             if is_sparse(X[column]):
-                raise ValueError("Auto-sklearn does not yet support sparse pandas Series."
-                                 f" Please convert {column} to a dense format.")
-            elif X[column].dtype.name in ['category', 'bool']:
-                feat_type[column] = 'categorical'
+                raise ValueError(
+                    "Auto-sklearn does not yet support sparse pandas Series."
+                    f" Please convert {column} to a dense format."
+                )
+            elif X[column].dtype.name in ["category", "bool"]:
+                feat_type[column] = "categorical"
             elif X[column].dtype.name == "string":
-                feat_type[column] = 'string'
+                feat_type[column] = "string"
             # Move away from np.issubdtype as it causes
             # TypeError: data type not understood in certain pandas types
             elif not is_numeric_dtype(X[column]):
-                if X[column].dtype.name == 'object':
+                if X[column].dtype.name == "object":
                     raise ValueError(
-                        "Input Column {} has invalid type object. "
+                        f"Input Column {column} has invalid type object. "
                         "Cast it to a valid dtype before using it in Auto-Sklearn. "
                         "Valid types are numerical, categorical or boolean. "
                         "You can cast it to a valid dtype using "
                         "pandas.Series.astype ."
                         "If working with string objects, the following "
                         "tutorial illustrates how to work with text data: "
-                        "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html".format(  # noqa: E501
-                            column,
-                        )
+                        "https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html"  # noqa: E501
                     )
                 elif pd.core.dtypes.common.is_datetime_or_timedelta_dtype(
                     X[column].dtype
                 ):
                     raise ValueError(
-                        "Auto-sklearn does not support time and/or date datatype as given "
-                        "in column {}. Please convert the time information to a numerical value "
-                        "first. One example on how to do this can be found on "
-                        "https://stats.stackexchange.com/questions/311494/".format(
-                            column,
-                        )
+                        "Auto-sklearn does not support time and/or date datatype as "
+                        f"given in column {column}. Please convert the time "
+                        " information to a numerical value first. One example on how to"
+                        " do this can be found on "
+                        " https://stats.stackexchange.com/questions/311494/"
                     )
                 else:
                     raise ValueError(
@@ -325,7 +335,7 @@ def get_feat_type_from_columns(
                         )
                     )
             else:
-                feat_type[column] = 'numerical'
+                feat_type[column] = "numerical"
         return feat_type
 
     def list_to_dataframe(
@@ -334,31 +344,32 @@ def list_to_dataframe(
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
     ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
         """
-        Converts a list to a pandas DataFrame. In this process, column types are inferred.
+        Converts a list to a DataFrame. In this process, column types are inferred.
 
         If test data is provided, we proactively match it to train data
 
         Parameters
         ----------
-            X_train: SUPPORTED_FEAT_TYPES
-                A set of features that are going to be validated (type and dimensionality
-                checks) and a encoder fitted in the case the data needs encoding
-            X_test: Optional[SUPPORTED_FEAT_TYPES]
-                A hold out set of data used for checking
+        X_train: SUPPORTED_FEAT_TYPES
+            A set of features that are going to be validated (type and dimensionality
+            checks) and a encoder fitted in the case the data needs encoding
+
+        X_test: Optional[SUPPORTED_FEAT_TYPES]
+            A hold out set of data used for checking
+
         Returns
         -------
-            pd.DataFrame:
-                transformed train data from list to pandas DataFrame
-            pd.DataFrame:
-                transformed test data from list to pandas DataFrame
+        Union[pd.DataFrame, pd.DataFrame]:
+            transformed (train, test) data from list to pandas DataFrame
         """
-
         # If a list was provided, it will be converted to pandas
         X_train = pd.DataFrame(data=X_train).convert_dtypes()
 
         # Store the dtypes and use in case of re-fit
         if len(self.dtypes) == 0:
-            self.dtypes = {col: X_train[col].dtype.name.lower() for col in X_train.columns}
+            self.dtypes = {
+                col: X_train[col].dtype.name.lower() for col in X_train.columns
+            }
         else:
             for col in X_train.columns:
                 # Try to convert to the original dtype used to fit the validator
@@ -367,25 +378,31 @@ def list_to_dataframe(
                 try:
                     X_train[col] = X_train[col].astype(self.dtypes[col])
                 except Exception as e:
-                    self.logger.warning(f"Failed to format column {col} as {self.dtypes[col]}: {e}")
+                    self.logger.warning(
+                        f"Failed to format column {col} as {self.dtypes[col]}: {e}"
+                    )
                     self.dtypes[col] = X_train[col].dtype.name.lower()
 
-        self.logger.warning("The provided feature types to autosklearn are of type list."
-                            "Features have been interpreted as: {}".format(
-                                [(col, t) for col, t in zip(X_train.columns, X_train.dtypes)]
-                            ))
+        self.logger.warning(
+            "The provided feature types to autosklearn are of type list."
+            "Features have been interpreted as: {}".format(
+                [(col, t) for col, t in zip(X_train.columns, X_train.dtypes)]
+            )
+        )
         if X_test is not None:
             if not isinstance(X_test, List):
-                self.logger.warning("Train features are a list while the provided test data"
-                                    "is {}. X_test will be casted as DataFrame.".format(
-                                        type(X_test)
-                                    ))
+                self.logger.warning(
+                    "Train features are a list while the provided test data"
+                    "is {}. X_test will be casted as DataFrame.".format(type(X_test))
+                )
             X_test = pd.DataFrame(data=X_test)
             for col in X_test.columns:
                 try:
                     X_test[col] = X_test[col].astype(self.dtypes[col])
                 except Exception as e:
-                    self.logger.warning(f"Failed to format column {col} as {self.dtypes[col]}: {e}")
+                    self.logger.warning(
+                        f"Failed to format column {col} as {self.dtypes[col]}: {e}"
+                    )
                     self.dtypes[col] = X_test[col].dtype.name.lower()
 
         return X_train, X_test
diff --git a/autosklearn/data/target_validator.py b/autosklearn/data/target_validator.py
index 9f6d2e74b5..030a40b9b0 100644
--- a/autosklearn/data/target_validator.py
+++ b/autosklearn/data/target_validator.py
@@ -1,16 +1,14 @@
+from typing import List, Optional, Type, Union, cast
+
 import logging
 import warnings
-from typing import List, Optional, Type, Union, cast
 
 import numpy as np
-
 import pandas as pd
+import sklearn.utils
 from pandas.api.types import is_numeric_dtype
 from pandas.core.dtypes.base import ExtensionDtype
-
 from scipy.sparse import spmatrix
-
-import sklearn.utils
 from sklearn.base import BaseEstimator
 from sklearn.exceptions import NotFittedError
 from sklearn.preprocessing import OrdinalEncoder
@@ -18,29 +16,34 @@
 
 from autosklearn.util.logging_ import PickableLoggerAdapter
 
-
 SUPPORTED_TARGET_TYPES = Union[List, pd.Series, pd.DataFrame, np.ndarray, spmatrix]
 
 
 class TargetValidator(BaseEstimator):
-    """
-    A class to pre-process targets. It validates the data provided during fit (to make sure
-    it matches Sklearn expectation) as well as encoding the targets in case of classification
+    """A class to pre-process targets.
+
+    It validates the data provided during fit to make sure it matches Sklearn
+    expectation as well as encoding the targets in case of classification
+
     Attributes
     ----------
-        is_classification: bool
-            A bool that indicates if the validator should operate in classification mode.
-            During classification, the targets are encoded.
-        encoder: Optional[BaseEstimator]
-            Host a encoder object if the data requires transformation (for example,
-            if provided a categorical column in a pandas DataFrame)
-        enc_columns: List[str]
-            List of columns that where encoded
+    is_classification: bool
+        A bool that indicates if the validator should operate in classification mode.
+        During classification, the targets are encoded.
+
+    encoder: Optional[BaseEstimator]
+        Host a encoder object if the data requires transformation (for example, if
+        provided a categorical column in a pandas DataFrame).
+
+    enc_columns: List[str]
+        List of columns that where encoded
     """
-    def __init__(self,
-                 is_classification: bool = False,
-                 logger: Optional[PickableLoggerAdapter] = None,
-                 ) -> None:
+
+    def __init__(
+        self,
+        is_classification: bool = False,
+        logger: Optional[PickableLoggerAdapter] = None,
+    ) -> None:
         self.is_classification = is_classification
 
         self.data_type = None  # type: Optional[type]
@@ -66,7 +69,7 @@ def fit(
         self,
         y_train: Union[List, np.ndarray, pd.Series, pd.DataFrame],
         y_test: Optional[Union[List, np.ndarray, pd.Series, pd.DataFrame]] = None,
-    ) -> 'TargetValidator':
+    ) -> "TargetValidator":
         """
         Validates and fit a categorical encoder (if needed) to the targets
         The supported data types are List, numpy arrays and pandas DataFrames.
@@ -87,31 +90,40 @@ def fit(
             self._check_data(y_test)
 
             if len(shape) != len(np.shape(y_test)) or (
-                    len(shape) > 1 and (shape[1] != np.shape(y_test)[1])):
-                raise ValueError("The dimensionality of the train and test"
-                                 " targets do not match"
-                                 f" train {np.shape(y_train)}"
-                                 f" != test {np.shape(y_test)}")
+                len(shape) > 1 and (shape[1] != np.shape(y_test)[1])
+            ):
+                raise ValueError(
+                    "The dimensionality of the train and test"
+                    " targets do not match"
+                    f" train {np.shape(y_train)}"
+                    f" != test {np.shape(y_test)}"
+                )
 
             if isinstance(y_train, pd.DataFrame):
                 if not isinstance(y_test, pd.DataFrame):
                     y_test = pd.DataFrame(y_test)
 
                 if y_train.columns.tolist() != y_test.columns.tolist():
-                    raise ValueError("Train and test targets must both have the"
-                                     f" same columns, yet y={y_train.columns}"
-                                     f" and y_test={y_test.columns}")
+                    raise ValueError(
+                        "Train and test targets must both have the"
+                        f" same columns, yet y={y_train.columns}"
+                        f" and y_test={y_test.columns}"
+                    )
 
                 if list(y_train.dtypes) != list(y_test.dtypes):
-                    raise ValueError("Train and test targets must both have the same dtypes")
+                    raise ValueError(
+                        "Train and test targets must both have the same dtypes"
+                    )
 
         if self.out_dimensionality is None:
             self.out_dimensionality = 1 if len(shape) == 1 else shape[1]
         else:
             _n_outputs = 1 if len(shape) == 1 else shape[1]
             if self.out_dimensionality != _n_outputs:
-                raise ValueError('Number of outputs changed from %d to %d!' %
-                                 (self.out_dimensionality, _n_outputs))
+                raise ValueError(
+                    "Number of outputs changed from %d to %d!"
+                    % (self.out_dimensionality, _n_outputs)
+                )
 
         # Fit on the training data
         self._fit(y_train, y_test)
@@ -124,7 +136,7 @@ def _fit(
         self,
         y_train: Union[List, np.ndarray, pd.Series, pd.DataFrame],
         y_test: Optional[Union[List, np.ndarray, pd.Series, pd.DataFrame]] = None,
-    ) -> 'TargetValidator':
+    ) -> "TargetValidator":
         """
         If dealing with classification, this utility encodes the targets.
 
@@ -139,7 +151,7 @@ def _fit(
             y_test: Optional[SUPPORTED_TARGET_TYPES]
                 A holdout set of labels
         """
-        if not self.is_classification or self.type_of_target == 'multilabel-indicator':
+        if not self.is_classification or self.type_of_target == "multilabel-indicator":
             # Only fit an encoder for classification tasks
             # Also, encoding multilabel indicator data makes the data multiclass
             # Let the user employ a MultiLabelBinarizer if needed
@@ -150,16 +162,17 @@ def _fit(
         shape = np.shape(y_train)
         ndim = len(shape)
         if ndim > 1 and shape[1] != 1:
-            # We should not reach this if statement as we check for type of targets before
-            raise ValueError("Multi-dimensional classification is not yet"
-                             " supported. Encoding multidimensional data"
-                             " converts multiple columns to a 1 dimensional encoding."
-                             f" Data involved = {shape}/{self.type_of_target}")
+            # We should not reach this if statement, we check for type of targets before
+            raise ValueError(
+                "Multi-dimensional classification is not yet"
+                " supported. Encoding multidimensional data"
+                " converts multiple columns to a 1 dimensional encoding."
+                f" Data involved = {shape}/{self.type_of_target}"
+            )
 
         # Creat the encoder
         self.encoder = OrdinalEncoder(
-            handle_unknown='use_encoded_value',
-            unknown_value=-1
+            handle_unknown="use_encoded_value", unknown_value=-1
         )
 
         # Clear typing to just numpy arrays and pandas
@@ -172,10 +185,12 @@ def _fit(
         # inverse_transform to try corretly restore it's dtype
         if isinstance(y, pd.Series):
             if isinstance(y.dtype, ExtensionDtype):
-                warnings.warn("Fitting transformer with a pandas series which"
-                              f" has the dtype {y.dtype}. Inverse transform"
-                              " may not be able preserve dtype when converting"
-                              " to np.ndarray")
+                warnings.warn(
+                    "Fitting transformer with a pandas series which"
+                    f" has the dtype {y.dtype}. Inverse transform"
+                    " may not be able preserve dtype when converting"
+                    " to np.ndarray"
+                )
             if is_numeric_dtype(y.dtype):
                 self.dtype = y.dtype
         elif isinstance(y, pd.DataFrame):
@@ -187,7 +202,7 @@ def _fit(
 
         # Merge y_test and y_train for encoding
         if y_test is not None:
-            if isinstance(y,  (pd.Series, pd.DataFrame)):
+            if isinstance(y, (pd.Series, pd.DataFrame)):
                 if isinstance(y, pd.Series):
                     y_test = pd.Series(y_test)
                 else:
@@ -257,7 +272,7 @@ def transform(
         y_transformed = sklearn.utils.check_array(
             y_transformed,
             force_all_finite=True,
-            accept_sparse='csr',
+            accept_sparse="csr",
             ensure_2d=False,
         )
 
@@ -271,7 +286,7 @@ def inverse_transform(
         self,
         y: Union[List, pd.Series, pd.DataFrame, np.ndarray],
     ) -> np.ndarray:
-        """ Revert any encoding transformation done on a target array.
+        """Revert any encoding transformation done on a target array.
 
         Parameters
         ----------
@@ -318,9 +333,7 @@ def inverse_transform(
         return y_inv
 
     def is_single_column_target(self) -> bool:
-        """
-        Output is encoded with a single column encoding
-        """
+        """Output is encoded with a single column encoding"""
         return self.out_dimensionality == 1
 
     def _check_data(
@@ -332,38 +345,41 @@ def _check_data(
 
         Parameters
         ----------
-            y: Union[np.ndarray, pd.DataFrame, pd.Series]
-                A set of features whose dimensionality and data type is going to be checked
+        y: Union[np.ndarray, pd.DataFrame, pd.Series]
+            A set of features whose dimensionality and data type is going to be checked
         """
-
         if not isinstance(
-                y, (np.ndarray, pd.DataFrame, List, pd.Series)) and not isinstance(y, spmatrix):
-            raise ValueError("Auto-sklearn only supports Numpy arrays, Pandas DataFrames,"
-                             " pd.Series, sparse data and Python Lists as targets, yet, "
-                             "the provided input is of type {}".format(
-                                 type(y)
-                             ))
+            y, (np.ndarray, pd.DataFrame, List, pd.Series)
+        ) and not isinstance(y, spmatrix):
+            raise ValueError(
+                "Auto-sklearn only supports Numpy arrays, Pandas DataFrames,"
+                " pd.Series, sparse data and Python Lists as targets, yet, "
+                "the provided input is of type {}".format(type(y))
+            )
 
         if isinstance(y, spmatrix) and not np.issubdtype(y.dtype.type, np.number):
-            raise ValueError("When providing a sparse matrix as targets, the only supported "
-                             "values are numerical. Please consider using a dense"
-                             " instead."
-                             )
+            raise ValueError(
+                "When providing a sparse matrix as targets, the only supported "
+                "values are numerical. Please consider using a dense"
+                " instead."
+            )
 
         if self.data_type is None:
             self.data_type = type(y)
         if self.data_type != type(y):
-            self.logger.warning("Auto-sklearn previously received targets of type %s "
-                                "yet the current features have type %s. Changing the dtype "
-                                "of inputs to an estimator might cause problems" % (
-                                      str(self.data_type),
-                                      str(type(y)),
-                                   ),
-                                )
+            self.logger.warning(
+                "Auto-sklearn previously received targets of type %s "
+                "yet the current features have type %s. Changing the dtype "
+                "of inputs to an estimator might cause problems"
+                % (
+                    str(self.data_type),
+                    str(type(y)),
+                ),
+            )
 
         # No Nan is supported
         has_nan_values = False
-        if hasattr(y, 'iloc'):
+        if hasattr(y, "iloc"):
             has_nan_values = cast(pd.DataFrame, y).isnull().values.any()
 
         if isinstance(y, spmatrix):
@@ -374,34 +390,39 @@ def _check_data(
             # but NaN, are not equal to themselves:
             has_nan_values = not np.array_equal(y, y)
         if has_nan_values:
-            raise ValueError("Target values cannot contain missing/NaN values. "
-                             "This is not supported by scikit-learn. "
-                             )
+            raise ValueError(
+                "Target values cannot contain missing/NaN values. "
+                "This is not supported by scikit-learn. "
+            )
 
         # Pandas Series is not supported for multi-label indicator
         # This format checks are done by type of target
         try:
             self.type_of_target = type_of_target(y)
         except Exception as e:
-            raise ValueError("The provided data could not be interpreted by Sklearn. "
-                             "While determining the type of the targets via type_of_target "
-                             "run into exception: {}.".format(e))
-
-        supported_output_types = ('binary',
-                                  'continuous',
-                                  'continuous-multioutput',
-                                  'multiclass',
-                                  'multilabel-indicator',
-                                  # Notice unknown/multiclass-multioutput are not supported
-                                  # This can only happen during testing only as estimators
-                                  # should filter out unsupported types.
-                                  )
+            raise ValueError(
+                "The provided data could not be interpreted by Sklearn. "
+                "While determining the type of the targets via type_of_target "
+                "run into exception: {}.".format(e)
+            )
+
+        supported_output_types = (
+            "binary",
+            "continuous",
+            "continuous-multioutput",
+            "multiclass",
+            "multilabel-indicator",
+            # Notice unknown/multiclass-multioutput are not supported
+            # This can only happen during testing only as estimators
+            # should filter out unsupported types.
+        )
         if self.type_of_target not in supported_output_types:
-            raise ValueError("Provided targets are not supported by Auto-Sklearn. "
-                             "Provided type is {} whereas supported types are {}.".format(
-                                 self.type_of_target,
-                                 supported_output_types
-                             ))
+            raise ValueError(
+                "Provided targets are not supported by Auto-Sklearn. "
+                "Provided type is {} whereas supported types are {}.".format(
+                    self.type_of_target, supported_output_types
+                )
+            )
 
     @property
     def classes_(self) -> np.ndarray:
@@ -410,10 +431,11 @@ def classes_(self) -> np.ndarray:
         which consist of a ndarray of shape (n_classes,)
         where n_classes are the number of classes seen while fitting
         a encoder to the targets.
+
         Returns
         -------
-            classes_: np.ndarray
-                The unique classes seen during encoding of a classifier
+        classes_: np.ndarray
+            The unique classes seen during encoding of a classifier
         """
         if self.encoder is None:
             return np.array([])
diff --git a/autosklearn/data/validation.py b/autosklearn/data/validation.py
index d06082258d..89aaca85c0 100644
--- a/autosklearn/data/validation.py
+++ b/autosklearn/data/validation.py
@@ -1,23 +1,21 @@
 # -*- encoding: utf-8 -*-
-import logging
 from typing import List, Optional, Tuple, Union
 
-import numpy as np
+import logging
 
+import numpy as np
 import pandas as pd
-
 from scipy.sparse import spmatrix
-
 from sklearn.base import BaseEstimator
 from sklearn.exceptions import NotFittedError
 
-from autosklearn.data.feature_validator import FeatureValidator, SUPPORTED_FEAT_TYPES
+from autosklearn.data.feature_validator import SUPPORTED_FEAT_TYPES, FeatureValidator
 from autosklearn.data.target_validator import SUPPORTED_TARGET_TYPES, TargetValidator
 from autosklearn.util.logging_ import get_named_client_logger
 
 
 def convert_if_sparse(
-    y: SUPPORTED_TARGET_TYPES
+    y: SUPPORTED_TARGET_TYPES,
 ) -> Union[np.ndarray, List, pd.DataFrame, pd.Series]:
     """If the labels `y` are sparse, it will convert it to its dense representation
 
@@ -51,26 +49,32 @@ class InputValidator(BaseEstimator):
 
     This class also perform checks for data integrity and flags the user
     via informative errors.
+
     Attributes
     ----------
-        feat_type: Optional[List[str]]
-            In case the dataset is not a pandas DataFrame:
-                + If provided, this list indicates which columns should be treated as categorical
-                  it is internally transformed into a dictionary that indicates a mapping from
-                  column index to categorical/numerical
-                + If not provided, by default all columns are treated as numerical
-            If the input dataset is of type pandas dataframe, this argument
-            must be none, as the column type will be inferred from the pandas dtypes.
-        is_classification: bool
-            For classification task, this flag indicates that the target data
-            should be encoded
-        feature_validator: FeatureValidator
-            A FeatureValidator instance used to validate and encode feature columns to match
-            sklearn expectations on the data
-        target_validator: TargetValidator
-            A TargetValidator instance used to validate and encode (in case of classification)
-            the target values
+    feat_type: Optional[List[str]] = None
+        In case the dataset is not a pandas DataFrame:
+            +   If provided, this list indicates which columns should be treated as
+                categorical it is internally transformed into a dictionary that
+                indicates a mapping from column index to categorical/numerical.
+            +   If not provided, by default all columns are treated as numerical
+
+        If the input dataset is of type pandas dataframe, this argument
+        must be none, as the column type will be inferred from the pandas dtypes.
+
+    is_classification: bool
+        For classification task, this flag indicates that the target data
+        should be encoded
+
+    feature_validator: FeatureValidator
+        A FeatureValidator instance used to validate and encode feature columns to match
+        sklearn expectations on the data
+
+    target_validator: TargetValidator
+        A TargetValidator instance used for classification to validate and encode the
+        target values
     """
+
     def __init__(
         self,
         feat_type: Optional[List[str]] = None,
@@ -82,16 +86,18 @@ def __init__(
         self.logger_port = logger_port
         if self.logger_port is not None:
             self.logger = get_named_client_logger(
-                name='Validation',
+                name="Validation",
                 port=self.logger_port,
             )
         else:
-            self.logger = logging.getLogger('Validation')
-
-        self.feature_validator = FeatureValidator(feat_type=self.feat_type,
-                                                  logger=self.logger)
-        self.target_validator = TargetValidator(is_classification=self.is_classification,
-                                                logger=self.logger)
+            self.logger = logging.getLogger("Validation")
+
+        self.feature_validator = FeatureValidator(
+            feat_type=self.feat_type, logger=self.logger
+        )
+        self.target_validator = TargetValidator(
+            is_classification=self.is_classification, logger=self.logger
+        )
         self._is_fitted = False
 
     def fit(
@@ -106,45 +112,55 @@ def fit(
         a encoder for targets in the case of classification. Specifically:
 
         For features:
-            + Valid data types are enforced (List, np.ndarray, pd.DataFrame, pd.Series, scipy
-              sparse) as well as dimensionality checks
-            + If the provided data is a pandas DataFrame with categorical/boolean/int columns,
-              such columns will be encoded using an Ordinal Encoder
+        Valid data types are enforced (List, np.ndarray, pd.DataFrame, pd.Series, scipy
+        sparse) as well as dimensionality checks
+
+        If the provided data is a pandas DataFrame with categorical/boolean/int columns,
+        such columns will be encoded using an Ordinal Encoder
+
         For targets:
-            + Checks for dimensionality as well as missing values are performed.
-            + If performing a classification task, the data is going to be encoded
+        * Checks for dimensionality as well as missing values are performed.
+        * If performing a classification task, the data is going to be encoded
 
         Parameters
         ----------
-            X_train: SUPPORTED_FEAT_TYPES
-                A set of features that are going to be validated (type and dimensionality
-                checks). If this data contains categorical columns, an encoder is going to
-                be instantiated and trained with this data.
-            y_train: SUPPORTED_TARGET_TYPES
-                A set of targets that are going to be encoded if the task is for classification
-            X_test: Optional[SUPPORTED_FEAT_TYPES]
-                A hold out set of features used for checking
-            y_test: SUPPORTED_TARGET_TYPES
-                A hold out set of targets used for checking. Additionally, if the current task
-                is a classification task, this y_test categories are also going to be used to
-                fit a pre-processing encoding (to prevent errors on unseen classes).
+        X_train: SUPPORTED_FEAT_TYPES
+            A set of features that are going to be validated (type and dimensionality
+            checks). If this data contains categorical columns, an encoder is going to
+            be instantiated and trained with this data.
+
+        y_train: SUPPORTED_TARGET_TYPES
+            A set of targets to encoded if the task is for classification.
+
+        X_test: Optional[SUPPORTED_FEAT_TYPES]
+            A hold out set of features used for checking
+
+        y_test: SUPPORTED_TARGET_TYPES
+            A hold out set of targets used for checking. Additionally, if the current
+            task is a classification task, this y_test categories are also going to be
+            used to fit a pre-processing encoding (to prevent errors on unseen classes).
+
         Returns
         -------
-            self
+        self
         """
         # Check that the data is valid
         if np.shape(X_train)[0] != np.shape(y_train)[0]:
-            raise ValueError("Inconsistent number of train datapoints for features and targets,"
-                             " {} for features and {} for targets".format(
-                                 np.shape(X_train)[0],
-                                 np.shape(y_train)[0],
-                             ))
+            raise ValueError(
+                "Inconsistent number of train datapoints for features and targets,"
+                " {} for features and {} for targets".format(
+                    np.shape(X_train)[0],
+                    np.shape(y_train)[0],
+                )
+            )
         if X_test is not None and np.shape(X_test)[0] != np.shape(y_test)[0]:
-            raise ValueError("Inconsistent number of test datapoints for features and targets,"
-                             " {} for features and {} for targets".format(
-                                 np.shape(X_test)[0],
-                                 np.shape(y_test)[0],
-                             ))
+            raise ValueError(
+                "Inconsistent number of test datapoints for features and targets,"
+                " {} for features and {} for targets".format(
+                    np.shape(X_test)[0],
+                    np.shape(y_test)[0],
+                )
+            )
 
         self.feature_validator.fit(X_train, X_test)
         self.target_validator.fit(y_train, y_test)
@@ -175,7 +191,9 @@ def transform(
                 The transformed targets array
         """
         if not self._is_fitted:
-            raise NotFittedError("Cannot call transform on a validator that is not fitted")
+            raise NotFittedError(
+                "Cannot call transform on a validator that is not fitted"
+            )
 
         X_transformed = self.feature_validator.transform(X)
         if y is not None:
diff --git a/autosklearn/data/xy_data_manager.py b/autosklearn/data/xy_data_manager.py
index 4c539157ee..d8cd467214 100644
--- a/autosklearn/data/xy_data_manager.py
+++ b/autosklearn/data/xy_data_manager.py
@@ -2,9 +2,7 @@
 from typing import Dict, Optional, Union, cast
 
 import numpy as np
-
 import pandas as pd
-
 from scipy import sparse
 
 from autosklearn.constants import (
@@ -15,14 +13,10 @@
     REGRESSION,
 )
 from autosklearn.data.abstract_data_manager import AbstractDataManager
-from autosklearn.data.validation import (
-    SUPPORTED_FEAT_TYPES,
-    SUPPORTED_TARGET_TYPES,
-)
+from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES
 
 
 class XYDataManager(AbstractDataManager):
-
     def __init__(
         self,
         X: SUPPORTED_FEAT_TYPES,
@@ -31,52 +25,59 @@ def __init__(
         y_test: Optional[SUPPORTED_TARGET_TYPES],
         task: int,
         feat_type: Dict[Union[str, int], str],
-        dataset_name: str
+        dataset_name: str,
     ):
         super(XYDataManager, self).__init__(dataset_name)
 
-        self.info['task'] = task
+        self.info["task"] = task
         if sparse.issparse(X):
-            self.info['is_sparse'] = 1
-            self.info['has_missing'] = np.all(np.isfinite(cast(sparse.csr_matrix, X).data))
+            self.info["is_sparse"] = 1
+            self.info["has_missing"] = np.all(
+                np.isfinite(cast(sparse.csr_matrix, X).data)
+            )
         else:
-            self.info['is_sparse'] = 0
-            if hasattr(X, 'iloc'):
-                self.info['has_missing'] = cast(pd.DataFrame, X).isnull().values.any()
+            self.info["is_sparse"] = 0
+            if hasattr(X, "iloc"):
+                self.info["has_missing"] = cast(pd.DataFrame, X).isnull().values.any()
             else:
-                self.info['has_missing'] = np.all(np.isfinite(X))
+                self.info["has_missing"] = np.all(np.isfinite(X))
 
         label_num = {
             REGRESSION: 1,
             BINARY_CLASSIFICATION: 2,
             MULTIOUTPUT_REGRESSION: np.shape(y)[-1],
             MULTICLASS_CLASSIFICATION: len(np.unique(y)),
-            MULTILABEL_CLASSIFICATION: np.shape(y)[-1]
+            MULTILABEL_CLASSIFICATION: np.shape(y)[-1],
         }
 
-        self.info['label_num'] = label_num[task]
+        self.info["label_num"] = label_num[task]
 
-        self.data['X_train'] = X
-        self.data['Y_train'] = y
+        self.data["X_train"] = X
+        self.data["Y_train"] = y
         if X_test is not None:
-            self.data['X_test'] = X_test
+            self.data["X_test"] = X_test
         if y_test is not None:
-            self.data['Y_test'] = y_test
+            self.data["Y_test"] = y_test
 
         if isinstance(feat_type, dict):
             self.feat_type = feat_type
         else:
-            raise ValueError("Unsupported feat_type provided. We expect the user to "
-                             "provide a Dict[str, str] mapping from column to categorical/ "
-                             "numerical.")
+            raise ValueError(
+                "Unsupported feat_type provided. We expect the user to "
+                "provide a Dict[str, str] mapping from column to categorical/ "
+                "numerical."
+            )
 
         # TODO: try to guess task type!
 
         if len(np.shape(y)) > 2:
-            raise ValueError('y must not have more than two dimensions, '
-                             'but has %d.' % len(np.shape(y)))
+            raise ValueError(
+                "y must not have more than two dimensions, "
+                "but has %d." % len(np.shape(y))
+            )
 
         if np.shape(X)[0] != np.shape(y)[0]:
-            raise ValueError('X and y must have the same number of '
-                             'datapoints, but have %d and %d.' % (np.shape(X)[0],
-                                                                  np.shape(y)[0]))
+            raise ValueError(
+                "X and y must have the same number of "
+                "datapoints, but have %d and %d." % (np.shape(X)[0], np.shape(y)[0])
+            )
diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
index e337726b0e..3707ce84c9 100644
--- a/autosklearn/ensemble_builder.py
+++ b/autosklearn/ensemble_builder.py
@@ -1,21 +1,21 @@
 # -*- encoding: utf-8 -*-
+from typing import List, Optional, Tuple, Union
+
 import glob
 import gzip
-import math
-import numbers
 import logging.handlers
+import math
 import multiprocessing
+import numbers
 import os
 import pickle
 import re
 import shutil
 import time
 import traceback
-from typing import List, Optional, Tuple, Union
 import zlib
 
 import dask.distributed
-
 import numpy as np
 import pandas as pd
 import pynisher
@@ -24,12 +24,13 @@
 from smac.runhistory.runhistory import RunInfo, RunValue
 from smac.tae.base import StatusType
 
+from autosklearn.automl_common.common.ensemble_building.abstract_ensemble import (  # noqa: E501
+    AbstractEnsemble,
+)
 from autosklearn.automl_common.common.utils.backend import Backend
-from autosklearn.automl_common.common.ensemble_building.abstract_ensemble import AbstractEnsemble
-
 from autosklearn.constants import BINARY_CLASSIFICATION
-from autosklearn.metrics import calculate_score, calculate_loss, Scorer
 from autosklearn.ensembles.ensemble_selection import EnsembleSelection
+from autosklearn.metrics import Scorer, calculate_loss, calculate_score
 from autosklearn.util.logging_ import get_named_client_logger
 from autosklearn.util.parallel import preload_modules
 
@@ -37,7 +38,7 @@
 Y_VALID = 1
 Y_TEST = 2
 
-MODEL_FN_RE = r'_([0-9]*)_([0-9]*)_([0-9]{1,3}\.[0-9]*)\.npy'
+MODEL_FN_RE = r"_([0-9]*)_([0-9]*)_([0-9]{1,3}\.[0-9]*)\.npy"
 
 
 class EnsembleBuilderManager(IncorporateRunResultCallback):
@@ -59,62 +60,78 @@ def __init__(
         ensemble_memory_limit: Optional[int],
         random_state: Union[int, np.random.RandomState],
         logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
-        pynisher_context: str = 'fork',
+        pynisher_context: str = "fork",
     ):
-        """ SMAC callback to handle ensemble building
+        """SMAC callback to handle ensemble building
 
         Parameters
         ----------
         start_time: int
-            the time when this job was started, to account for any latency in job allocation
+            the time when this job was started, to account for any latency in job
+            allocation.
+
         time_left_for_ensemble: int
-            How much time is left for the task. Job should finish within this allocated time
+            How much time is left for the task. Job should finish within this
+            allocated time
+
         backend: util.backend.Backend
             backend to write and read files
+
         dataset_name: str
             name of dataset
+
         task_type: int
             type of ML task
+
         metric: str
             name of metric to compute the loss of the given predictions
+
         ensemble_size: int
-            maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection)
+            maximal size of ensemble
+
         ensemble_nbest: int/float
             if int: consider only the n best prediction
             if float: consider only this fraction of the best models
             Both wrt to validation predictions
             If performance_range_threshold > 0, might return less models
+
         max_models_on_disc: int
            Defines the maximum number of models that are kept in the disc.
-           If int, it must be greater or equal than 1, and dictates the max number of
-           models to keep.
-           If float, it will be interpreted as the max megabytes allowed of disc space. That
-           is, if the number of ensemble candidates require more disc space than this float
-           value, the worst models will be deleted to keep within this budget.
-           Models and predictions of the worst-performing models will be deleted then.
-           If None, the feature is disabled.
-           It defines an upper bound on the models that can be used in the ensemble.
+
+           If int, it must be greater or equal than 1, and dictates the max
+           number of models to keep.
+
+           If float, it will be interpreted as the max megabytes allowed of
+           disc space. That is, if the number of ensemble candidates require more
+           disc space than this float value, the worst models will be deleted to
+           keep within this budget. Models and predictions of the worst-performing
+           models will be deleted then.
+
+           If None, the feature is disabled. It defines an upper bound on the
+           models that can be used in the ensemble.
+
         seed: int
             random seed
+
         max_iterations: int
             maximal number of iterations to run this script
             (default None --> deactivated)
+
         precision: [16,32,64,128]
             precision of floats to read the predictions
+
         memory_limit: Optional[int]
             memory limit in mb. If ``None``, no memory limit is enforced.
+
         read_at_most: int
             read at most n new prediction files in each iteration
+
         logger_port: int
             port that receives logging records
+
         pynisher_context: str
             The multiprocessing context for pynisher. One of spawn/fork/forkserver.
 
-    Returns
-    -------
-        List[Tuple[int, float, float, float]]:
-            A list with the performance history of this ensemble, of the form
-            [[pandas_timestamp, train_performance, val_performance, test_performance], ...]
         """
         self.start_time = start_time
         self.time_left_for_ensembles = time_left_for_ensembles
@@ -148,26 +165,31 @@ def __init__(
 
     def __call__(
         self,
-        smbo: 'SMBO',
+        smbo: "SMBO",
         run_info: RunInfo,
         result: RunValue,
         time_left: float,
     ):
+        """
+        Returns
+        -------
+        List[Tuple[int, float, float, float]]:
+            A list with the performance history of this ensemble, of the form
+            [(pandas_timestamp, train_performance, val_performance, test_performance)]
+        """
         if result.status in (StatusType.STOP, StatusType.ABORT) or smbo._stop:
             return
         self.build_ensemble(smbo.tae_runner.client)
 
     def build_ensemble(
-        self,
-        dask_client: dask.distributed.Client,
-        unit_test: bool = False
+        self, dask_client: dask.distributed.Client, unit_test: bool = False
     ) -> None:
 
         # The second criteria is elapsed time
         elapsed_time = time.time() - self.start_time
 
         logger = get_named_client_logger(
-            name='EnsembleBuilder',
+            name="EnsembleBuilder",
             port=self.logger_port,
         )
 
@@ -181,10 +203,8 @@ def build_ensemble(
             return
         if self.max_iterations is not None and self.max_iterations <= self.iteration:
             logger.info(
-                "Terminate ensemble building because of max iterations: {} of {}".format(
-                    self.max_iterations,
-                    self.iteration
-                )
+                "Terminate ensemble building because of max iterations:"
+                f" {self.max_iterations} of {self.iteration}"
             )
             return
 
@@ -193,11 +213,13 @@ def build_ensemble(
                 result = self.futures.pop().result()
                 if result:
                     ensemble_history, self.ensemble_nbest, _, _, _ = result
-                    logger.debug("iteration={} @ elapsed_time={} has history={}".format(
-                        self.iteration,
-                        elapsed_time,
-                        ensemble_history,
-                    ))
+                    logger.debug(
+                        "iteration={} @ elapsed_time={} has history={}".format(
+                            self.iteration,
+                            elapsed_time,
+                            ensemble_history,
+                        )
+                    )
                     self.history.extend(ensemble_history)
 
         # Only submit new jobs if the previous ensemble job finished
@@ -215,28 +237,30 @@ def build_ensemble(
                 # see it in the dask diagnostic dashboard
                 # Notice that the forked ensemble_builder_process will
                 # wait for the below function to be done
-                self.futures.append(dask_client.submit(
-                    fit_and_return_ensemble,
-                    backend=self.backend,
-                    dataset_name=self.dataset_name,
-                    task_type=self.task,
-                    metric=self.metric,
-                    ensemble_size=self.ensemble_size,
-                    ensemble_nbest=self.ensemble_nbest,
-                    max_models_on_disc=self.max_models_on_disc,
-                    seed=self.seed,
-                    precision=self.precision,
-                    memory_limit=self.ensemble_memory_limit,
-                    read_at_most=self.read_at_most,
-                    random_state=self.random_state,
-                    end_at=self.start_time + self.time_left_for_ensembles,
-                    iteration=self.iteration,
-                    return_predictions=False,
-                    priority=100,
-                    pynisher_context=self.pynisher_context,
-                    logger_port=self.logger_port,
-                    unit_test=unit_test,
-                ))
+                self.futures.append(
+                    dask_client.submit(
+                        fit_and_return_ensemble,
+                        backend=self.backend,
+                        dataset_name=self.dataset_name,
+                        task_type=self.task,
+                        metric=self.metric,
+                        ensemble_size=self.ensemble_size,
+                        ensemble_nbest=self.ensemble_nbest,
+                        max_models_on_disc=self.max_models_on_disc,
+                        seed=self.seed,
+                        precision=self.precision,
+                        memory_limit=self.ensemble_memory_limit,
+                        read_at_most=self.read_at_most,
+                        random_state=self.random_state,
+                        end_at=self.start_time + self.time_left_for_ensembles,
+                        iteration=self.iteration,
+                        return_predictions=False,
+                        priority=100,
+                        pynisher_context=self.pynisher_context,
+                        logger_port=self.logger_port,
+                        unit_test=unit_test,
+                    )
+                )
 
                 logger.info(
                     "{}/{} Started Ensemble builder job at {} for iteration {}.".format(
@@ -276,11 +300,11 @@ def fit_and_return_ensemble(
     memory_limit: Optional[int] = None,
     random_state: Optional[Union[int, np.random.RandomState]] = None,
 ) -> Tuple[
-        List[Tuple[int, float, float, float]],
-        int,
-        Optional[np.ndarray],
-        Optional[np.ndarray],
-        Optional[np.ndarray],
+    List[Tuple[int, float, float, float]],
+    int,
+    Optional[np.ndarray],
+    Optional[np.ndarray],
+    Optional[np.ndarray],
 ]:
     """
 
@@ -291,60 +315,79 @@ def fit_and_return_ensemble(
     ----------
         backend: util.backend.Backend
             backend to write and read files
+
         dataset_name: str
             name of dataset
+
         metric: str
             name of metric to compute the loss of the given predictions
+
         task_type: int
             type of ML task
+
         ensemble_size: int
             maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection)
+
         ensemble_nbest: int/float
             if int: consider only the n best prediction
             if float: consider only this fraction of the best models
             Both wrt to validation predictions
             If performance_range_threshold > 0, might return less models
+
         max_models_on_disc: int
            Defines the maximum number of models that are kept in the disc.
+
            If int, it must be greater or equal than 1, and dictates the max number of
            models to keep.
-           If float, it will be interpreted as the max megabytes allowed of disc space. That
-           is, if the number of ensemble candidates require more disc space than this float
-           value, the worst models will be deleted to keep within this budget.
-           Models and predictions of the worst-performing models will be deleted then.
+
+           If float, it will be interpreted as the max megabytes allowed of disc space.
+           That is, if the number of ensemble candidates require more disc space than
+           this float value, the worst models will be deleted to keep within this
+           budget. Models and predictions of the worst-performing models will be
+           deleted then.
+
            If None, the feature is disabled.
            It defines an upper bound on the models that can be used in the ensemble.
+
         seed: int
             random seed
+
         precision: [16,32,64,128]
             precision of floats to read the predictions
+
         read_at_most: int
             read at most n new prediction files in each iteration
+
         end_at: float
-            At what time the job must finish. Needs to be the endtime and not the time left
-            because we do not know when dask schedules the job.
+            At what time the job must finish. Needs to be the endtime and not the
+            time left because we do not know when dask schedules the job.
+
         iteration: int
             The current iteration
+
         pynisher_context: str
             Context to use for multiprocessing, can be either fork, spawn or forkserver.
+
         logger_port: int = DEFAULT_TCP_LOGGING_PORT
             The port where the logging server is listening to.
+
         unit_test: bool = False
-            Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
-            Having this is very bad coding style, but I did not find a way to make
-            unittest.mock work through the pynisher with all spawn contexts. If you know a
-            better solution, please let us know by opening an issue.
+            Turn on unit testing mode. This currently makes fit_ensemble raise a
+            MemoryError. Having this is very bad coding style, but I did not find a way
+            to make unittest.mock work through the pynisher with all spawn contexts.
+            If you know a better solution, please let us know by opening an issue.
+
         memory_limit: Optional[int] = None
             memory limit in mb. If ``None``, no memory limit is enforced.
+
         random_state: Optional[int | RandomState] = None
             A random state used for the ensemble selection process.
 
     Returns
     -------
-        List[Tuple[int, float, float, float]]
-            A list with the performance history of this ensemble, of the form
-            [[pandas_timestamp, train_performance, val_performance, test_performance], ...]
-
+    List[Tuple[int, float, float, float]]
+        A list with the performance history of this ensemble, of the form
+        [(pandas_timestamp, train_performance, val_performance, test_performance)]
     """
     result = EnsembleBuilder(
         backend=backend,
@@ -390,58 +433,58 @@ def __init__(
         unit_test: bool = False,
     ):
         """
-            Constructor
-
-            Parameters
-            ----------
-            backend: util.backend.Backend
-                backend to write and read files
-            dataset_name: str
-                name of dataset
-            task_type: int
-                type of ML task
-            metric: str
-                name of metric to compute the loss of the given predictions
-            ensemble_size: int = 10
-                maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection)
-            ensemble_nbest: int | float = 100
-                if int: consider only the n best prediction
-                if float: consider only this fraction of the best models
-                Both with respect to the validation predictions
-                If performance_range_threshold > 0, might return less models
-            max_models_on_disc: int = 100
-               Defines the maximum number of models that are kept in the disc.
-               If int, it must be greater or equal than 1, and dictates the max number of
-               models to keep.
-               If float, it will be interpreted as the max megabytes allowed of disc space. That
-               is, if the number of ensemble candidates require more disc space than this float
-               value, the worst models will be deleted to keep within this budget.
-               Models and predictions of the worst-performing models will be deleted then.
-               If None, the feature is disabled.
-               It defines an upper bound on the models that can be used in the ensemble.
-            performance_range_threshold: float = 0
-                Keep only models that are better than:
-                    dummy + (best - dummy)*performance_range_threshold
-                E.g dummy=2, best=4, thresh=0.5 --> only consider models with loss > 3
-                Will at most return the minimum between ensemble_nbest models,
-                and max_models_on_disc. Might return less
-            seed: int = 1
-                random seed that is used as part of the filename
-            precision: int in [16,32,64,128] = 32
-                precision of floats to read the predictions
-            memory_limit: Optional[int] = 1024
-                memory limit in mb. If ``None``, no memory limit is enforced.
-            read_at_most: int = 5
-                read at most n new prediction files in each iteration
-            logger_port: int = DEFAULT_TCP_LOGGING_PORT
-                port that receives logging records
-            random_state: Optional[int | RandomState] = None
-                An int or RandomState object used for generating the ensemble.
-            unit_test: bool = False
-                Turn on unit testing mode. This currently makes fit_ensemble raise a MemoryError.
-                Having this is very bad coding style, but I did not find a way to make
-                unittest.mock work through the pynisher with all spawn contexts. If you know a
-                better solution, please let us know by opening an issue.
+        Constructor
+
+        Parameters
+        ----------
+        backend: util.backend.Backend
+            backend to write and read files
+        dataset_name: str
+            name of dataset
+        task_type: int
+            type of ML task
+        metric: str
+            name of metric to compute the loss of the given predictions
+        ensemble_size: int = 10
+            maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection)
+        ensemble_nbest: int | float = 100
+            if int: consider only the n best prediction
+            if float: consider only this fraction of the best models
+            Both with respect to the validation predictions
+            If performance_range_threshold > 0, might return less models
+        max_models_on_disc: int = 100
+           Defines the maximum number of models that are kept in the disc.
+           If int, it must be greater or equal than 1, and dictates the max number of
+           models to keep.
+           If float, it will be interpreted as the max megabytes allowed of disc space.
+           That is, if the number of ensemble candidates require more disc space than
+           this float value, the worst models are deleted to keep within this budget.
+           Models and predictions of the worst-performing models will be deleted then.
+           If None, the feature is disabled.
+           It defines an upper bound on the models that can be used in the ensemble.
+        performance_range_threshold: float = 0
+            Keep only models that are better than:
+                dummy + (best - dummy)*performance_range_threshold
+            E.g dummy=2, best=4, thresh=0.5 --> only consider models with loss > 3
+            Will at most return the minimum between ensemble_nbest models,
+            and max_models_on_disc. Might return less
+        seed: int = 1
+            random seed that is used as part of the filename
+        precision: int in [16,32,64,128] = 32
+            precision of floats to read the predictions
+        memory_limit: Optional[int] = 1024
+            memory limit in mb. If ``None``, no memory limit is enforced.
+        read_at_most: int = 5
+            read at most n new prediction files in each iteration
+        logger_port: int = DEFAULT_TCP_LOGGING_PORT
+            port that receives logging records
+        random_state: Optional[int | RandomState] = None
+            An int or RandomState object used for generating the ensemble.
+        unit_test: bool = False
+            Turn on unit testing mode. This currently makes fit_ensemble raise
+            a MemoryError. Having this is very bad coding style, but I did not find a
+            way to make unittest.mock work through the pynisher with all spawn contexts.
+            If you know a better solution, please let us know by opening an issue.
         """
 
         super(EnsembleBuilder, self).__init__()
@@ -454,13 +497,15 @@ def __init__(
         self.performance_range_threshold = performance_range_threshold
 
         if isinstance(ensemble_nbest, numbers.Integral) and ensemble_nbest < 1:
-            raise ValueError("Integer ensemble_nbest has to be larger 1: %s" %
-                             ensemble_nbest)
+            raise ValueError(
+                "Integer ensemble_nbest has to be larger 1: %s" % ensemble_nbest
+            )
         elif not isinstance(ensemble_nbest, numbers.Integral):
             if ensemble_nbest < 0 or ensemble_nbest > 1:
                 raise ValueError(
-                    "Float ensemble_nbest best has to be >= 0 and <= 1: %s" %
-                    ensemble_nbest)
+                    "Float ensemble_nbest best has to be >= 0 and <= 1: %s"
+                    % ensemble_nbest
+                )
 
         self.ensemble_nbest = ensemble_nbest
 
@@ -469,9 +514,7 @@ def __init__(
         # max number of ensemble models. max_resident_models keeps the
         # maximum number of models in disc
         if max_models_on_disc is not None and max_models_on_disc < 0:
-            raise ValueError(
-                "max_models_on_disc has to be a positive number or None"
-            )
+            raise ValueError("max_models_on_disc has to be a positive number or None")
         self.max_models_on_disc = max_models_on_disc
         self.max_resident_models = None
 
@@ -485,13 +528,15 @@ def __init__(
         # Setup the logger
         self.logger_port = logger_port
         self.logger = get_named_client_logger(
-            name='EnsembleBuilder',
+            name="EnsembleBuilder",
             port=self.logger_port,
         )
 
         if ensemble_nbest == 1:
-            self.logger.debug("Behaviour depends on int/float: %s, %s (ensemble_nbest, type)" %
-                              (ensemble_nbest, type(ensemble_nbest)))
+            self.logger.debug(
+                "Behaviour depends on int/float: %s, %s (ensemble_nbest, type)"
+                % (ensemble_nbest, type(ensemble_nbest))
+            )
 
         self.start_time = 0
         self.model_fn_re = re.compile(MODEL_FN_RE)
@@ -528,8 +573,7 @@ def __init__(
         # we save the state of this dictionary to memory
         # and read it if available
         self.ensemble_memory_file = os.path.join(
-            self.backend.internals_directory,
-            'ensemble_read_preds.pkl'
+            self.backend.internals_directory, "ensemble_read_preds.pkl"
         )
         if os.path.exists(self.ensemble_memory_file):
             try:
@@ -537,15 +581,12 @@ def __init__(
                     self.read_preds, self.last_hash = pickle.load(memory)
             except Exception as e:
                 self.logger.warning(
-                    "Could not load the previous iterations of ensemble_builder predictions."
-                    "This might impact the quality of the run. Exception={} {}".format(
-                        e,
-                        traceback.format_exc(),
-                    )
+                    "Could not load the previous iterations of ensemble_builder"
+                    " predictions. This might impact the quality of the run."
+                    f" Exception={e} {traceback.format_exc()}"
                 )
         self.ensemble_loss_file = os.path.join(
-            self.backend.internals_directory,
-            'ensemble_read_losses.pkl'
+            self.backend.internals_directory, "ensemble_read_losses.pkl"
         )
         if os.path.exists(self.ensemble_loss_file):
             try:
@@ -560,17 +601,17 @@ def __init__(
                     )
                 )
 
-        # hidden feature which can be activated via an environment variable. This keeps all
-        # models and predictions which have ever been a candidate. This is necessary to post-hoc
-        # compute the whole ensemble building trajectory.
+        # hidden feature which can be activated via an environment variable.
+        # This keeps all models and predictions which have ever been a candidate.
+        # This is necessary to post-hoc compute the whole ensemble building trajectory.
         self._has_been_candidate = set()
 
         self.validation_performance_ = np.inf
 
         # Track the ensemble performance
         datamanager = self.backend.load_datamanager()
-        self.y_valid = datamanager.data.get('Y_valid')
-        self.y_test = datamanager.data.get('Y_test')
+        self.y_valid = datamanager.data.get("Y_valid")
+        self.y_test = datamanager.data.get("Y_test")
         del datamanager
         self.ensemble_history = []
 
@@ -585,12 +626,12 @@ def run(
     ):
 
         if time_left is None and end_at is None:
-            raise ValueError('Must provide either time_left or end_at.')
+            raise ValueError("Must provide either time_left or end_at.")
         elif time_left is not None and end_at is not None:
-            raise ValueError('Cannot provide both time_left and end_at.')
+            raise ValueError("Cannot provide both time_left and end_at.")
 
         self.logger = get_named_client_logger(
-            name='EnsembleBuilder',
+            name="EnsembleBuilder",
             port=self.logger_port,
         )
 
@@ -624,37 +665,44 @@ def run(
                 # if ensemble script died because of memory error,
                 # reduce nbest to reduce memory consumption and try it again
 
-                # ATTENTION: main will start from scratch; # all data structures are empty again
+                # ATTENTION: main will start from scratch;
+                # all data structures are empty again
                 try:
                     os.remove(self.ensemble_memory_file)
                 except:  # noqa E722
                     pass
 
-                if isinstance(self.ensemble_nbest, numbers.Integral) and self.ensemble_nbest <= 1:
+                if (
+                    isinstance(self.ensemble_nbest, numbers.Integral)
+                    and self.ensemble_nbest <= 1
+                ):
                     if self.read_at_most == 1:
                         self.logger.error(
-                            "Memory Exception -- Unable to further reduce the number of ensemble "
-                            "members and can no further limit the number of ensemble members "
-                            "loaded per iteration -- please restart Auto-sklearn with a higher "
-                            "value for the argument `memory_limit` (current limit is %s MB). "
-                            "The ensemble builder will keep running to delete files from disk in "
-                            "case this was enabled.", self.memory_limit
+                            "Memory Exception -- Unable to further reduce the number"
+                            " of ensemble members and can no further limit the number"
+                            " of ensemble members loaded per iteration, please restart"
+                            " Auto-sklearn with a higher value for the argument"
+                            f" `memory_limit` (current limit is {self.memory_limit}MB)."
+                            " The ensemble builder will keep running to delete files"
+                            " from disk in case this was enabled.",
                         )
                         self.ensemble_nbest = 0
                     else:
                         self.read_at_most = 1
                         self.logger.warning(
-                            "Memory Exception -- Unable to further reduce the number of ensemble "
-                            "members -- Now reducing the number of predictions per call to read "
-                            "at most to 1."
+                            "Memory Exception -- Unable to further reduce the number of"
+                            " ensemble members. Now reducing the number of predictions"
+                            " per call to read at most to 1."
                         )
                 else:
                     if isinstance(self.ensemble_nbest, numbers.Integral):
                         self.ensemble_nbest = max(1, int(self.ensemble_nbest / 2))
                     else:
                         self.ensemble_nbest = self.ensemble_nbest / 2
-                    self.logger.warning("Memory Exception -- restart with "
-                                        "less ensemble_nbest: %d" % self.ensemble_nbest)
+                    self.logger.warning(
+                        "Memory Exception -- restart with "
+                        "less ensemble_nbest: %d" % self.ensemble_nbest
+                    )
                     return [], self.ensemble_nbest, None, None, None
             else:
                 return safe_ensemble_script.result
@@ -667,7 +715,7 @@ def main(self, time_left, iteration, return_predictions):
         # the logger configuration. So we have to set it up
         # accordingly
         self.logger = get_named_client_logger(
-            name='EnsembleBuilder',
+            name="EnsembleBuilder",
             port=self.logger_port,
         )
 
@@ -676,7 +724,7 @@ def main(self, time_left, iteration, return_predictions):
 
         used_time = time.time() - self.start_time
         self.logger.debug(
-            'Starting iteration %d, time left: %f',
+            "Starting iteration %d, time left: %f",
             iteration,
             time_left - used_time,
         )
@@ -684,7 +732,13 @@ def main(self, time_left, iteration, return_predictions):
         # populates self.read_preds and self.read_losses
         if not self.compute_loss_per_model():
             if return_predictions:
-                return self.ensemble_history, self.ensemble_nbest, train_pred, valid_pred, test_pred
+                return (
+                    self.ensemble_history,
+                    self.ensemble_nbest,
+                    train_pred,
+                    valid_pred,
+                    test_pred,
+                )
             else:
                 return self.ensemble_history, self.ensemble_nbest, None, None, None
 
@@ -693,22 +747,40 @@ def main(self, time_left, iteration, return_predictions):
         candidate_models = self.get_n_best_preds()
         if not candidate_models:  # no candidates yet
             if return_predictions:
-                return self.ensemble_history, self.ensemble_nbest, train_pred, valid_pred, test_pred
+                return (
+                    self.ensemble_history,
+                    self.ensemble_nbest,
+                    train_pred,
+                    valid_pred,
+                    test_pred,
+                )
             else:
                 return self.ensemble_history, self.ensemble_nbest, None, None, None
 
         # populates predictions in self.read_preds
         # reduces selected models if file reading failed
-        n_sel_valid, n_sel_test = self. \
-            get_valid_test_preds(selected_keys=candidate_models)
+        n_sel_valid, n_sel_test = self.get_valid_test_preds(
+            selected_keys=candidate_models
+        )
 
         # If valid/test predictions loaded, then reduce candidate models to this set
-        if len(n_sel_test) != 0 and len(n_sel_valid) != 0 \
-                and len(set(n_sel_valid).intersection(set(n_sel_test))) == 0:
+        if (
+            len(n_sel_test) != 0
+            and len(n_sel_valid) != 0
+            and len(set(n_sel_valid).intersection(set(n_sel_test))) == 0
+        ):
             # Both n_sel_* have entries, but there is no overlap, this is critical
-            self.logger.error("n_sel_valid and n_sel_test are not empty, but do not overlap")
+            self.logger.error(
+                "n_sel_valid and n_sel_test are not empty, but do not overlap"
+            )
             if return_predictions:
-                return self.ensemble_history, self.ensemble_nbest, train_pred, valid_pred, test_pred
+                return (
+                    self.ensemble_history,
+                    self.ensemble_nbest,
+                    train_pred,
+                    valid_pred,
+                    test_pred,
+                )
             else:
                 return self.ensemble_history, self.ensemble_nbest, None, None, None
 
@@ -716,24 +788,31 @@ def main(self, time_left, iteration, return_predictions):
         # then ensure candidate_models AND n_sel_test are sorted the same
         candidate_models_set = set(candidate_models)
         if candidate_models_set.intersection(n_sel_valid).intersection(n_sel_test):
-            candidate_models = sorted(list(candidate_models_set.intersection(
-                n_sel_valid).intersection(n_sel_test)))
+            candidate_models = sorted(
+                list(
+                    candidate_models_set.intersection(n_sel_valid).intersection(
+                        n_sel_test
+                    )
+                )
+            )
             n_sel_test = candidate_models
             n_sel_valid = candidate_models
         elif candidate_models_set.intersection(n_sel_valid):
-            candidate_models = sorted(list(candidate_models_set.intersection(
-                n_sel_valid)))
+            candidate_models = sorted(
+                list(candidate_models_set.intersection(n_sel_valid))
+            )
             n_sel_valid = candidate_models
         elif candidate_models_set.intersection(n_sel_test):
-            candidate_models = sorted(list(candidate_models_set.intersection(
-                n_sel_test)))
+            candidate_models = sorted(
+                list(candidate_models_set.intersection(n_sel_test))
+            )
             n_sel_test = candidate_models
         else:
             # This has to be the case
             n_sel_test = []
             n_sel_valid = []
 
-        if os.environ.get('ENSEMBLE_KEEP_ALL_CANDIDATES'):
+        if os.environ.get("ENSEMBLE_KEEP_ALL_CANDIDATES"):
             for candidate in candidate_models:
                 self._has_been_candidate.add(candidate)
 
@@ -744,8 +823,9 @@ def main(self, time_left, iteration, return_predictions):
         if ensemble is not None and self.SAVE2DISC:
             self.backend.save_ensemble(ensemble, iteration, self.seed)
 
-        # Delete files of non-candidate models - can only be done after fitting the ensemble and
-        # saving it to disc so we do not accidentally delete models in the previous ensemble
+        # Delete files of non-candidate models - can only be done after fitting the
+        # ensemble and saving it to disc so we do not accidentally delete models in
+        # the previous ensemble
         if self.max_resident_models is not None:
             self._delete_excess_models(selected_keys=candidate_models)
 
@@ -754,39 +834,47 @@ def main(self, time_left, iteration, return_predictions):
             pickle.dump(self.read_losses, memory)
 
         if ensemble is not None:
-            train_pred = self.predict(set_="train",
-                                      ensemble=ensemble,
-                                      selected_keys=candidate_models,
-                                      n_preds=len(candidate_models),
-                                      index_run=iteration)
+            train_pred = self.predict(
+                set_="train",
+                ensemble=ensemble,
+                selected_keys=candidate_models,
+                n_preds=len(candidate_models),
+                index_run=iteration,
+            )
             # We can't use candidate_models here, as n_sel_* might be empty
-            valid_pred = self.predict(set_="valid",
-                                      ensemble=ensemble,
-                                      selected_keys=n_sel_valid,
-                                      n_preds=len(candidate_models),
-                                      index_run=iteration)
+            valid_pred = self.predict(
+                set_="valid",
+                ensemble=ensemble,
+                selected_keys=n_sel_valid,
+                n_preds=len(candidate_models),
+                index_run=iteration,
+            )
             # TODO if predictions fails, build the model again during the
             #  next iteration!
-            test_pred = self.predict(set_="test",
-                                     ensemble=ensemble,
-                                     selected_keys=n_sel_test,
-                                     n_preds=len(candidate_models),
-                                     index_run=iteration)
+            test_pred = self.predict(
+                set_="test",
+                ensemble=ensemble,
+                selected_keys=n_sel_test,
+                n_preds=len(candidate_models),
+                index_run=iteration,
+            )
 
             # Add a score to run history to see ensemble progress
-            self._add_ensemble_trajectory(
-                train_pred,
-                valid_pred,
-                test_pred
-            )
+            self._add_ensemble_trajectory(train_pred, valid_pred, test_pred)
 
-        # The loaded predictions and the hash can only be saved after the ensemble has been
+        # The loaded predictions and hash can only be saved after the ensemble has been
         # built, because the hash is computed during the construction of the ensemble
         with open(self.ensemble_memory_file, "wb") as memory:
             pickle.dump((self.read_preds, self.last_hash), memory)
 
         if return_predictions:
-            return self.ensemble_history, self.ensemble_nbest, train_pred, valid_pred, test_pred
+            return (
+                self.ensemble_history,
+                self.ensemble_nbest,
+                train_pred,
+                valid_pred,
+                test_pred,
+            )
         else:
             return self.ensemble_history, self.ensemble_nbest, None, None, None
 
@@ -803,10 +891,14 @@ def get_disk_consumption(self, pred_path):
         _budget = float(match.group(3))
 
         stored_files_for_run = os.listdir(
-            self.backend.get_numrun_directory(_seed, _num_run, _budget))
+            self.backend.get_numrun_directory(_seed, _num_run, _budget)
+        )
         stored_files_for_run = [
-            os.path.join(self.backend.get_numrun_directory(_seed, _num_run, _budget), file_name)
-            for file_name in stored_files_for_run]
+            os.path.join(
+                self.backend.get_numrun_directory(_seed, _num_run, _budget), file_name
+            )
+            for file_name in stored_files_for_run
+        ]
         this_model_cost = sum([os.path.getsize(path) for path in stored_files_for_run])
 
         # get the megabytes
@@ -814,8 +906,8 @@ def get_disk_consumption(self, pred_path):
 
     def compute_loss_per_model(self):
         """
-            Compute the loss of the predictions on ensemble building data set;
-            populates self.read_preds and self.read_losses
+        Compute the loss of the predictions on ensemble building data set;
+        populates self.read_preds and self.read_losses
         """
 
         self.logger.debug("Read ensemble data set predictions")
@@ -832,17 +924,21 @@ def compute_loss_per_model(self):
 
         pred_path = os.path.join(
             glob.escape(self.backend.get_runs_directory()),
-            '%d_*_*' % self.seed,
-            'predictions_ensemble_%s_*_*.npy*' % self.seed,
+            "%d_*_*" % self.seed,
+            "predictions_ensemble_%s_*_*.npy*" % self.seed,
         )
         y_ens_files = glob.glob(pred_path)
-        y_ens_files = [y_ens_file for y_ens_file in y_ens_files
-                       if y_ens_file.endswith('.npy') or y_ens_file.endswith('.npy.gz')]
+        y_ens_files = [
+            y_ens_file
+            for y_ens_file in y_ens_files
+            if y_ens_file.endswith(".npy") or y_ens_file.endswith(".npy.gz")
+        ]
         self.y_ens_files = y_ens_files
         # no validation predictions so far -- no files
         if len(self.y_ens_files) == 0:
-            self.logger.debug("Found no prediction files on ensemble data set:"
-                              " %s" % pred_path)
+            self.logger.debug(
+                "Found no prediction files on ensemble data set:" " %s" % pred_path
+            )
             return False
 
         # First sort files chronologically
@@ -858,15 +954,18 @@ def compute_loss_per_model(self):
 
         n_read_files = 0
         # Now read file wrt to num_run
-        for y_ens_fn, match, _seed, _num_run, _budget, mtime in \
-                sorted(to_read, key=lambda x: x[5]):
+        for y_ens_fn, match, _seed, _num_run, _budget, mtime in sorted(
+            to_read, key=lambda x: x[5]
+        ):
             if self.read_at_most and n_read_files >= self.read_at_most:
                 # limit the number of files that will be read
                 # to limit memory consumption
                 break
 
             if not y_ens_fn.endswith(".npy") and not y_ens_fn.endswith(".npy.gz"):
-                self.logger.info('Error loading file (not .npy or .npy.gz): %s', y_ens_fn)
+                self.logger.info(
+                    "Error loading file (not .npy or .npy.gz): %s", y_ens_fn
+                )
                 continue
 
             if not self.read_losses.get(y_ens_fn):
@@ -884,7 +983,7 @@ def compute_loss_per_model(self):
                     # 1 - loaded and in memory
                     # 2 - loaded but dropped again
                     # 3 - deleted from disk due to space constraints
-                    "loaded": 0
+                    "loaded": 0,
                 }
             if not self.read_preds.get(y_ens_fn):
                 self.read_preds[y_ens_fn] = {
@@ -900,16 +999,18 @@ def compute_loss_per_model(self):
             # actually read the predictions and compute their respective loss
             try:
                 y_ensemble = self._read_np_fn(y_ens_fn)
-                loss = calculate_loss(solution=self.y_true_ensemble,
-                                      prediction=y_ensemble,
-                                      task_type=self.task_type,
-                                      metric=self.metric,
-                                      scoring_functions=None)
+                loss = calculate_loss(
+                    solution=self.y_true_ensemble,
+                    prediction=y_ensemble,
+                    task_type=self.task_type,
+                    metric=self.metric,
+                    scoring_functions=None,
+                )
 
                 if np.isfinite(self.read_losses[y_ens_fn]["ens_loss"]):
                     self.logger.debug(
-                        'Changing ensemble loss for file %s from %f to %f '
-                        'because file modification time changed? %f - %f',
+                        "Changing ensemble loss for file %s from %f to %f "
+                        "because file modification time changed? %f - %f",
                         y_ens_fn,
                         self.read_losses[y_ens_fn]["ens_loss"],
                         loss,
@@ -923,39 +1024,38 @@ def compute_loss_per_model(self):
                 # To save memory, we just compute the loss.
                 self.read_losses[y_ens_fn]["mtime_ens"] = os.path.getmtime(y_ens_fn)
                 self.read_losses[y_ens_fn]["loaded"] = 2
-                self.read_losses[y_ens_fn]["disc_space_cost_mb"] = self.get_disk_consumption(
-                    y_ens_fn
-                )
+                self.read_losses[y_ens_fn][
+                    "disc_space_cost_mb"
+                ] = self.get_disk_consumption(y_ens_fn)
 
                 n_read_files += 1
 
             except Exception:
                 self.logger.warning(
-                    'Error loading %s: %s',
+                    "Error loading %s: %s",
                     y_ens_fn,
                     traceback.format_exc(),
                 )
                 self.read_losses[y_ens_fn]["ens_loss"] = np.inf
 
         self.logger.debug(
-            'Done reading %d new prediction files. Loaded %d predictions in '
-            'total.',
+            "Done reading %d new prediction files. Loaded %d predictions in " "total.",
             n_read_files,
-            np.sum([pred["loaded"] > 0 for pred in self.read_losses.values()])
+            np.sum([pred["loaded"] > 0 for pred in self.read_losses.values()]),
         )
         return True
 
     def get_n_best_preds(self):
         """
-            get best n predictions (i.e., keys of self.read_losses)
-            according to the loss on the "ensemble set"
-            n: self.ensemble_nbest
-
-            Side effects:
-                ->Define the n-best models to use in ensemble
-                ->Only the best models are loaded
-                ->Any model that is not best is candidate to deletion
-                  if max models in disc is exceeded.
+        get best n predictions (i.e., keys of self.read_losses)
+        according to the loss on the "ensemble set"
+        n: self.ensemble_nbest
+
+        Side effects:
+            ->Define the n-best models to use in ensemble
+            ->Only the best models are loaded
+            ->Any model that is not best is candidate to deletion
+              if max models in disc is exceeded.
         """
 
         sorted_keys = self._get_list_of_sorted_preds()
@@ -982,31 +1082,39 @@ def get_n_best_preds(self):
             # no model left; try to use dummy loss (num_run==0)
             # log warning when there are other models but not better than dummy model
             if num_keys > num_dummy:
-                self.logger.warning("No models better than random - using Dummy loss!"
-                                    "Number of models besides current dummy model: %d. "
-                                    "Number of dummy models: %d",
-                                    num_keys - 1,
-                                    num_dummy)
+                self.logger.warning(
+                    "No models better than random - using Dummy loss!"
+                    "Number of models besides current dummy model: %d. "
+                    "Number of dummy models: %d",
+                    num_keys - 1,
+                    num_dummy,
+                )
             sorted_keys = [
-                (k, v["ens_loss"], v["num_run"]) for k, v in self.read_losses.items()
+                (k, v["ens_loss"], v["num_run"])
+                for k, v in self.read_losses.items()
                 if v["seed"] == self.seed and v["num_run"] == 1
             ]
         # reload predictions if losses changed over time and a model is
         # considered to be in the top models again!
         if not isinstance(self.ensemble_nbest, numbers.Integral):
             # Transform to number of models to keep. Keep at least one
-            keep_nbest = max(1, min(len(sorted_keys),
-                                    int(len(sorted_keys) * self.ensemble_nbest)))
+            keep_nbest = max(
+                1, min(len(sorted_keys), int(len(sorted_keys) * self.ensemble_nbest))
+            )
             self.logger.debug(
                 "Library pruning: using only top %f percent of the models for ensemble "
                 "(%d out of %d)",
-                self.ensemble_nbest * 100, keep_nbest, len(sorted_keys)
+                self.ensemble_nbest * 100,
+                keep_nbest,
+                len(sorted_keys),
             )
         else:
             # Keep only at most ensemble_nbest
             keep_nbest = min(self.ensemble_nbest, len(sorted_keys))
-            self.logger.debug("Library Pruning: using for ensemble only "
-                              " %d (out of %d) models" % (keep_nbest, len(sorted_keys)))
+            self.logger.debug(
+                "Library Pruning: using for ensemble only "
+                " %d (out of %d) models" % (keep_nbest, len(sorted_keys))
+            )
 
         # If max_models_on_disc is None, do nothing
         # One can only read at most max_models_on_disc models
@@ -1016,21 +1124,28 @@ def get_n_best_preds(self):
                     [
                         v["ens_loss"],
                         v["disc_space_cost_mb"],
-                    ] for v in self.read_losses.values() if v["disc_space_cost_mb"] is not None
+                    ]
+                    for v in self.read_losses.values()
+                    if v["disc_space_cost_mb"] is not None
                 ]
                 max_consumption = max(c[1] for c in consumption)
 
                 # We are pessimistic with the consumption limit indicated by
                 # max_models_on_disc by 1 model. Such model is assumed to spend
                 # max_consumption megabytes
-                if (sum(c[1] for c in consumption) + max_consumption) > self.max_models_on_disc:
+                if (
+                    sum(c[1] for c in consumption) + max_consumption
+                ) > self.max_models_on_disc:
 
                     # just leave the best -- smaller is better!
                     # This list is in descending order, to preserve the best models
-                    sorted_cum_consumption = np.cumsum([
-                        c[1] for c in list(sorted(consumption))
-                    ]) + max_consumption
-                    max_models = np.argmax(sorted_cum_consumption > self.max_models_on_disc)
+                    sorted_cum_consumption = (
+                        np.cumsum([c[1] for c in list(sorted(consumption))])
+                        + max_consumption
+                    )
+                    max_models = np.argmax(
+                        sorted_cum_consumption > self.max_models_on_disc
+                    )
 
                     # Make sure that at least 1 model survives
                     self.max_resident_models = max(1, max_models)
@@ -1040,7 +1155,7 @@ def get_n_best_preds(self):
                             self.max_models_on_disc,
                             (sum(c[1] for c in consumption) + max_consumption),
                             max_consumption,
-                            self.max_resident_models
+                            self.max_resident_models,
                         )
                     )
                 else:
@@ -1048,11 +1163,15 @@ def get_n_best_preds(self):
             else:
                 self.max_resident_models = self.max_models_on_disc
 
-        if self.max_resident_models is not None and keep_nbest > self.max_resident_models:
+        if (
+            self.max_resident_models is not None
+            and keep_nbest > self.max_resident_models
+        ):
             self.logger.debug(
                 "Restricting the number of models to %d instead of %d due to argument "
                 "max_models_on_disc",
-                self.max_resident_models, keep_nbest,
+                self.max_resident_models,
+                keep_nbest,
             )
             keep_nbest = self.max_resident_models
 
@@ -1069,9 +1188,12 @@ def get_n_best_preds(self):
                     # but always keep at least one model
                     current_loss = sorted_keys[i][1]
                     if current_loss >= worst_loss:
-                        self.logger.debug("Dynamic Performance range: "
-                                          "Further reduce from %d to %d models",
-                                          keep_nbest, max(1, i))
+                        self.logger.debug(
+                            "Dynamic Performance range: "
+                            "Further reduce from %d to %d models",
+                            keep_nbest,
+                            max(1, i),
+                        )
                         keep_nbest = max(1, i)
                         break
         ensemble_n_best = keep_nbest
@@ -1085,38 +1207,33 @@ def get_n_best_preds(self):
                 self.read_preds[k][Y_ENSEMBLE] = None
                 self.read_preds[k][Y_VALID] = None
                 self.read_preds[k][Y_TEST] = None
-            if self.read_losses[k]['loaded'] == 1:
+            if self.read_losses[k]["loaded"] == 1:
                 self.logger.debug(
-                    'Dropping model %s (%d,%d) with loss %f.',
+                    "Dropping model %s (%d,%d) with loss %f.",
                     k,
-                    self.read_losses[k]['seed'],
-                    self.read_losses[k]['num_run'],
-                    self.read_losses[k]['ens_loss'],
+                    self.read_losses[k]["seed"],
+                    self.read_losses[k]["num_run"],
+                    self.read_losses[k]["ens_loss"],
                 )
-                self.read_losses[k]['loaded'] = 2
+                self.read_losses[k]["loaded"] = 2
 
         # Load the predictions for the winning
         for k in sorted_keys[:ensemble_n_best]:
             if (
-                (
-                    k not in self.read_preds or
-                    self.read_preds[k][Y_ENSEMBLE] is None
-                )
-                and self.read_losses[k]['loaded'] != 3
-            ):
+                k not in self.read_preds or self.read_preds[k][Y_ENSEMBLE] is None
+            ) and self.read_losses[k]["loaded"] != 3:
                 self.read_preds[k][Y_ENSEMBLE] = self._read_np_fn(k)
                 # No need to load valid and test here because they are loaded
                 #  only if the model ends up in the ensemble
-                self.read_losses[k]['loaded'] = 1
+                self.read_losses[k]["loaded"] = 1
 
         # return keys of self.read_losses with lowest losses
         return sorted_keys[:ensemble_n_best]
 
-    def get_valid_test_preds(self, selected_keys: List[str]) -> Tuple[List[str], List[str]]:
-        """
-        get valid and test predictions from disc
-        and store them in self.read_preds
-
+    def get_valid_test_preds(
+        self, selected_keys: List[str]
+    ) -> Tuple[List[str], List[str]]:
+        """Get valid and test predictions from disc and store them in self.read_preds
         Parameters
         ---------
         selected_keys: list
@@ -1135,35 +1252,47 @@ def get_valid_test_preds(self, selected_keys: List[str]) -> Tuple[List[str], Lis
             valid_fn = glob.glob(
                 os.path.join(
                     glob.escape(self.backend.get_runs_directory()),
-                    '%d_%d_%s' % (
+                    "%d_%d_%s"
+                    % (
                         self.read_losses[k]["seed"],
                         self.read_losses[k]["num_run"],
                         self.read_losses[k]["budget"],
                     ),
-                    'predictions_valid_%d_%d_%s.npy*' % (
+                    "predictions_valid_%d_%d_%s.npy*"
+                    % (
                         self.read_losses[k]["seed"],
                         self.read_losses[k]["num_run"],
                         self.read_losses[k]["budget"],
-                    )
+                    ),
                 )
             )
-            valid_fn = [vfn for vfn in valid_fn if vfn.endswith('.npy') or vfn.endswith('.npy.gz')]
+            valid_fn = [
+                vfn
+                for vfn in valid_fn
+                if vfn.endswith(".npy") or vfn.endswith(".npy.gz")
+            ]
             test_fn = glob.glob(
                 os.path.join(
                     glob.escape(self.backend.get_runs_directory()),
-                    '%d_%d_%s' % (
+                    "%d_%d_%s"
+                    % (
                         self.read_losses[k]["seed"],
                         self.read_losses[k]["num_run"],
                         self.read_losses[k]["budget"],
                     ),
-                    'predictions_test_%d_%d_%s.npy*' % (
+                    "predictions_test_%d_%d_%s.npy*"
+                    % (
                         self.read_losses[k]["seed"],
                         self.read_losses[k]["num_run"],
-                        self.read_losses[k]["budget"]
-                    )
+                        self.read_losses[k]["budget"],
+                    ),
                 )
             )
-            test_fn = [tfn for tfn in test_fn if tfn.endswith('.npy') or tfn.endswith('.npy.gz')]
+            test_fn = [
+                tfn
+                for tfn in test_fn
+                if tfn.endswith(".npy") or tfn.endswith(".npy.gz")
+            ]
 
             if len(valid_fn) == 0:
                 # self.logger.debug("Not found validation prediction file "
@@ -1185,8 +1314,9 @@ def get_valid_test_preds(self, selected_keys: List[str]) -> Tuple[List[str], Lis
                     success_keys_valid.append(k)
                     self.read_losses[k]["mtime_valid"] = os.path.getmtime(valid_fn)
                 except Exception:
-                    self.logger.warning('Error loading %s: %s',
-                                        valid_fn, traceback.format_exc())
+                    self.logger.warning(
+                        "Error loading %s: %s", valid_fn, traceback.format_exc()
+                    )
 
             if len(test_fn) == 0:
                 # self.logger.debug("Not found test prediction file (although "
@@ -1208,26 +1338,24 @@ def get_valid_test_preds(self, selected_keys: List[str]) -> Tuple[List[str], Lis
                     success_keys_test.append(k)
                     self.read_losses[k]["mtime_test"] = os.path.getmtime(test_fn)
                 except Exception:
-                    self.logger.warning('Error loading %s: %s',
-                                        test_fn, traceback.format_exc())
+                    self.logger.warning(
+                        "Error loading %s: %s", test_fn, traceback.format_exc()
+                    )
 
         return success_keys_valid, success_keys_test
 
     def fit_ensemble(self, selected_keys: list):
         """
-            fit ensemble
-
-            Parameters
-            ---------
-            selected_keys: list
-                list of selected keys of self.read_losses
+        Parameters
+        ---------
+        selected_keys: list
+            list of selected keys of self.read_losses
 
-            Returns
-            -------
-            ensemble: EnsembleSelection
-                trained Ensemble
+        Returns
+        -------
+        ensemble: EnsembleSelection
+            trained Ensemble
         """
-
         if self.unit_test:
             raise MemoryError()
 
@@ -1238,13 +1366,16 @@ def fit_ensemble(self, selected_keys: list):
                 self.read_losses[k]["num_run"],
                 self.read_losses[k]["budget"],
             )
-            for k in selected_keys]
+            for k in selected_keys
+        ]
 
         # check hash if ensemble training data changed
-        current_hash = "".join([
-            str(zlib.adler32(predictions_train[i].data.tobytes()))
-            for i in range(len(predictions_train))
-        ])
+        current_hash = "".join(
+            [
+                str(zlib.adler32(predictions_train[i].data.tobytes()))
+                for i in range(len(predictions_train))
+            ]
+        )
         if self.last_hash == current_hash:
             self.logger.debug(
                 "No new model predictions selected -- skip ensemble building "
@@ -1268,8 +1399,7 @@ def fit_ensemble(self, selected_keys: list):
                 len(predictions_train),
             )
             start_time = time.time()
-            ensemble.fit(predictions_train, self.y_true_ensemble,
-                         include_num_runs)
+            ensemble.fit(predictions_train, self.y_true_ensemble, include_num_runs)
             end_time = time.time()
             self.logger.debug(
                 "Fitting the ensemble took %.2f seconds.",
@@ -1282,10 +1412,10 @@ def fit_ensemble(self, selected_keys: list):
             )
 
         except ValueError:
-            self.logger.error('Caught ValueError: %s', traceback.format_exc())
+            self.logger.error("Caught ValueError: %s", traceback.format_exc())
             return None
         except IndexError:
-            self.logger.error('Caught IndexError: %s' + traceback.format_exc())
+            self.logger.error("Caught IndexError: %s" + traceback.format_exc())
             return None
         finally:
             # Explicitly free memory
@@ -1293,37 +1423,39 @@ def fit_ensemble(self, selected_keys: list):
 
         return ensemble
 
-    def predict(self, set_: str,
-                ensemble: AbstractEnsemble,
-                selected_keys: list,
-                n_preds: int,
-                index_run: int):
-        """
-            save preditions on ensemble, validation and test data on disc
-
-            Parameters
-            ----------
-            set_: ["valid","test"]
-                data split name
-            ensemble: EnsembleSelection
-                trained Ensemble
-            selected_keys: list
-                list of selected keys of self.read_losses
-            n_preds: int
-                number of prediction models used for ensemble building
-                same number of predictions on valid and test are necessary
-            index_run: int
-                n-th time that ensemble predictions are written to disc
-
-            Return
-            ------
-            y: np.ndarray
+    def predict(
+        self,
+        set_: str,
+        ensemble: AbstractEnsemble,
+        selected_keys: list,
+        n_preds: int,
+        index_run: int,
+    ):
+        """Save preditions on ensemble, validation and test data on disc
+
+        Parameters
+        ----------
+        set_: ["valid","test"]
+            data split name
+        ensemble: EnsembleSelection
+            trained Ensemble
+        selected_keys: list
+            list of selected keys of self.read_losses
+        n_preds: int
+            number of prediction models used for ensemble building
+            same number of predictions on valid and test are necessary
+        index_run: int
+            n-th time that ensemble predictions are written to disc
+
+        Return
+        ------
+        y: np.ndarray
         """
         self.logger.debug("Predicting the %s set with the ensemble!", set_)
 
-        if set_ == 'valid':
+        if set_ == "valid":
             pred_set = Y_VALID
-        elif set_ == 'test':
+        elif set_ == "test":
             pred_set = Y_TEST
         else:
             pred_set = Y_ENSEMBLE
@@ -1364,79 +1496,82 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
                 train_pred = np.vstack(
                     ((1 - train_pred).reshape((1, -1)), train_pred.reshape((1, -1)))
                 ).transpose()
-            if valid_pred is not None and (len(valid_pred.shape) == 1 or valid_pred.shape[1] == 1):
+            if valid_pred is not None and (
+                len(valid_pred.shape) == 1 or valid_pred.shape[1] == 1
+            ):
                 valid_pred = np.vstack(
                     ((1 - valid_pred).reshape((1, -1)), valid_pred.reshape((1, -1)))
                 ).transpose()
-            if test_pred is not None and (len(test_pred.shape) == 1 or test_pred.shape[1] == 1):
+            if test_pred is not None and (
+                len(test_pred.shape) == 1 or test_pred.shape[1] == 1
+            ):
                 test_pred = np.vstack(
                     ((1 - test_pred).reshape((1, -1)), test_pred.reshape((1, -1)))
                 ).transpose()
 
         performance_stamp = {
-            'Timestamp': pd.Timestamp.now(),
-            'ensemble_optimization_score': calculate_score(
+            "Timestamp": pd.Timestamp.now(),
+            "ensemble_optimization_score": calculate_score(
                 solution=self.y_true_ensemble,
                 prediction=train_pred,
                 task_type=self.task_type,
                 metric=self.metric,
-                scoring_functions=None
-            )
+                scoring_functions=None,
+            ),
         }
         if valid_pred is not None:
             # TODO: valid_pred are a legacy from competition manager
             # and this if never happens. Re-evaluate Y_valid support
-            performance_stamp['ensemble_val_score'] = calculate_score(
+            performance_stamp["ensemble_val_score"] = calculate_score(
                 solution=self.y_valid,
                 prediction=valid_pred,
                 task_type=self.task_type,
                 metric=self.metric,
-                scoring_functions=None
+                scoring_functions=None,
             )
 
         # In case test_pred was provided
         if test_pred is not None:
-            performance_stamp['ensemble_test_score'] = calculate_score(
+            performance_stamp["ensemble_test_score"] = calculate_score(
                 solution=self.y_test,
                 prediction=test_pred,
                 task_type=self.task_type,
                 metric=self.metric,
-                scoring_functions=None
+                scoring_functions=None,
             )
 
         self.ensemble_history.append(performance_stamp)
 
     def _get_list_of_sorted_preds(self):
         """
-            Returns a list of sorted predictions in descending order
-            Losses are taken from self.read_losses.
+        Returns a list of sorted predictions in descending order
+        Losses are taken from self.read_losses.
 
-            Parameters
-            ----------
-            None
+        Parameters
+        ----------
+        None
 
-            Return
-            ------
-            sorted_keys: list
+        Return
+        ------
+        sorted_keys: list
         """
         # Sort by loss - smaller is better!
-        sorted_keys = list(sorted(
-            [
-                (k, v["ens_loss"], v["num_run"])
-                for k, v in self.read_losses.items()
-            ],
-            # Sort by loss as priority 1 and then by num_run on a ascending order
-            # We want small num_run first
-            key=lambda x: (x[1], x[2]),
-        ))
+        sorted_keys = list(
+            sorted(
+                [(k, v["ens_loss"], v["num_run"]) for k, v in self.read_losses.items()],
+                # Sort by loss as priority 1 and then by num_run on a ascending order
+                # We want small num_run first
+                key=lambda x: (x[1], x[2]),
+            )
+        )
         return sorted_keys
 
     def _delete_excess_models(self, selected_keys: List[str]):
         """
-            Deletes models excess models on disc. self.max_models_on_disc
-            defines the upper limit on how many models to keep.
-            Any additional model with a worst loss than the top
-            self.max_models_on_disc is deleted.
+        Deletes models excess models on disc. self.max_models_on_disc
+        defines the upper limit on how many models to keep.
+        Any additional model with a worst loss than the top
+        self.max_models_on_disc is deleted.
 
         """
 
@@ -1461,8 +1596,8 @@ def _delete_excess_models(self, selected_keys: List[str]):
 
             numrun_dir = self.backend.get_numrun_directory(_seed, _num_run, _budget)
             try:
-                os.rename(numrun_dir, numrun_dir + '.old')
-                shutil.rmtree(numrun_dir + '.old')
+                os.rename(numrun_dir, numrun_dir + ".old")
+                shutil.rmtree(numrun_dir + ".old")
                 self.logger.info("Deleted files of non-candidate model %s", pred_path)
                 self.read_losses[pred_path]["disc_space_cost_mb"] = None
                 self.read_losses[pred_path]["loaded"] = 3
@@ -1470,7 +1605,9 @@ def _delete_excess_models(self, selected_keys: List[str]):
             except Exception as e:
                 self.logger.error(
                     "Failed to delete files of non-candidate model %s due"
-                    " to error %s", pred_path, e
+                    " to error %s",
+                    pred_path,
+                    e,
                 )
 
     def _read_np_fn(self, path):
@@ -1478,9 +1615,7 @@ def _read_np_fn(self, path):
         # Support for string precision
         if isinstance(self.precision, str):
             precision = int(self.precision)
-            self.logger.warning("Interpreted str-precision as {}".format(
-                precision
-            ))
+            self.logger.warning("Interpreted str-precision as {}".format(precision))
         else:
             precision = self.precision
 
@@ -1490,7 +1625,7 @@ def _read_np_fn(self, path):
             open_method = open
         else:
             raise ValueError("Unknown filetype %s" % path)
-        with open_method(path, 'rb') as fp:
+        with open_method(path, "rb") as fp:
             if precision == 16:
                 predictions = np.load(fp, allow_pickle=True).astype(dtype=np.float16)
             elif precision == 32:
diff --git a/autosklearn/ensembles/abstract_ensemble.py b/autosklearn/ensembles/abstract_ensemble.py
index 752131407f..24d352ab5e 100644
--- a/autosklearn/ensembles/abstract_ensemble.py
+++ b/autosklearn/ensembles/abstract_ensemble.py
@@ -15,7 +15,7 @@ def fit(
         base_models_predictions: np.ndarray,
         true_targets: np.ndarray,
         model_identifiers: List[Tuple[int, int, float]],
-    ) -> 'AbstractEnsemble':
+    ) -> "AbstractEnsemble":
         """Fit an ensemble given predictions of base models and targets.
 
         Ensemble building maximizes performance (in contrast to
@@ -23,7 +23,8 @@ def fit(
 
         Parameters
         ----------
-        base_models_predictions : array of shape = [n_base_models, n_data_points, n_targets]
+        base_models_predictions: np.ndarray
+            shape = (n_base_models, n_data_points, n_targets)
             n_targets is the number of classes in case of classification,
             n_targets is 0 or 1 in case of regression
 
@@ -40,12 +41,15 @@ def fit(
         pass
 
     @abstractmethod
-    def predict(self, base_models_predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+    def predict(
+        self, base_models_predictions: Union[np.ndarray, List[np.ndarray]]
+    ) -> np.ndarray:
         """Create ensemble predictions from the base model predictions.
 
         Parameters
         ----------
-        base_models_predictions : array of shape = [n_base_models, n_data_points, n_targets]
+        base_models_predictions : np.ndarray
+            shape = (n_base_models, n_data_points, n_targets)
             Same as in the fit method.
 
         Returns
diff --git a/autosklearn/ensembles/ensemble_selection.py b/autosklearn/ensembles/ensemble_selection.py
index 1546c763c2..3ae216da01 100644
--- a/autosklearn/ensembles/ensemble_selection.py
+++ b/autosklearn/ensembles/ensemble_selection.py
@@ -1,9 +1,9 @@
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
+
 import random
 from collections import Counter
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
 
 import numpy as np
-
 from sklearn.utils import check_random_state
 
 from autosklearn.constants import TASK_TYPES
@@ -19,10 +19,10 @@ def __init__(
         task_type: int,
         metric: Scorer,
         bagging: bool = False,
-        mode: str = 'fast',
+        mode: str = "fast",
         random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
-        """ An ensemble of selected algorithms
+        """An ensemble of selected algorithms
 
         Fitting an EnsembleSelection generates an ensemble from the the models
         generated during the search process. Can be further used for prediction.
@@ -79,17 +79,19 @@ def fit(
     ) -> AbstractEnsemble:
         self.ensemble_size = int(self.ensemble_size)
         if self.ensemble_size < 1:
-            raise ValueError('Ensemble size cannot be less than one!')
+            raise ValueError("Ensemble size cannot be less than one!")
         if self.task_type not in TASK_TYPES:
-            raise ValueError('Unknown task type %s.' % self.task_type)
+            raise ValueError("Unknown task type %s." % self.task_type)
         if not isinstance(self.metric, Scorer):
-            raise ValueError("The provided metric must be an instance of Scorer, "
-                             "nevertheless it is {}({})".format(
-                                 self.metric,
-                                 type(self.metric),
-                             ))
-        if self.mode not in ('fast', 'slow'):
-            raise ValueError('Unknown mode %s' % self.mode)
+            raise ValueError(
+                "The provided metric must be an instance of Scorer, "
+                "nevertheless it is {}({})".format(
+                    self.metric,
+                    type(self.metric),
+                )
+            )
+        if self.mode not in ("fast", "slow"):
+            raise ValueError("Unknown mode %s" % self.mode)
 
         if self.bagging:
             self._bagging(predictions, labels)
@@ -104,7 +106,7 @@ def _fit(
         predictions: List[np.ndarray],
         labels: np.ndarray,
     ) -> AbstractEnsemble:
-        if self.mode == 'fast':
+        if self.mode == "fast":
             self._fast(predictions, labels)
         else:
             self._slow(predictions, labels)
@@ -149,18 +151,17 @@ def _fast(
             # Memory-efficient averaging!
             for j, pred in enumerate(predictions):
                 # fant_ensemble_prediction is the prediction of the current ensemble
-                # and should be ([predictions[selected_prev_iterations] + predictions[j])/(s+1)
-                # We overwrite the contents of fant_ensemble_prediction
-                # directly with weighted_ensemble_prediction + new_prediction and then scale for avg
-                np.add(
-                    weighted_ensemble_prediction,
-                    pred,
-                    out=fant_ensemble_prediction
-                )
+                # and should be
+                #
+                #   ([predictions[selected_prev_iterations] + predictions[j])/(s+1)
+                #
+                # We overwrite the contents of fant_ensemble_prediction directly with
+                # weighted_ensemble_prediction + new_prediction and then scale for avg
+                np.add(weighted_ensemble_prediction, pred, out=fant_ensemble_prediction)
                 np.multiply(
                     fant_ensemble_prediction,
-                    (1. / float(s + 1)),
-                    out=fant_ensemble_prediction
+                    (1.0 / float(s + 1)),
+                    out=fant_ensemble_prediction,
                 )
 
                 # calculate_loss is versatile and can return a dict of losses
@@ -172,8 +173,8 @@ def _fast(
                         prediction=fant_ensemble_prediction,
                         task_type=self.task_type,
                         metric=self.metric,
-                        scoring_functions=None
-                    )
+                        scoring_functions=None,
+                    ),
                 )
 
             all_best = np.argwhere(losses == np.nanmin(losses)).flatten()
@@ -192,11 +193,7 @@ def _fast(
         self.trajectory_ = trajectory
         self.train_loss_ = trajectory[-1]
 
-    def _slow(
-        self,
-        predictions: List[np.ndarray],
-        labels: np.ndarray
-    ) -> None:
+    def _slow(self, predictions: List[np.ndarray], labels: np.ndarray) -> None:
         """Rich Caruana's ensemble selection method."""
         self.num_input_models_ = len(predictions)
 
@@ -223,8 +220,8 @@ def _slow(
                         prediction=ensemble_prediction,
                         task_type=self.task_type,
                         metric=self.metric,
-                        scoring_functions=None
-                    )
+                        scoring_functions=None,
+                    ),
                 )
                 ensemble.pop()
             best = np.nanargmin(losses)
@@ -269,7 +266,7 @@ def _bagging(
         n_bags: int = 20,
     ) -> np.ndarray:
         """Rich Caruana's ensemble selection method with bagging."""
-        raise ValueError('Bagging might not work with class-based interface!')
+        raise ValueError("Bagging might not work with class-based interface!")
         n_models = predictions.shape[0]
         bag_size = int(n_models * fraction)
 
@@ -308,30 +305,34 @@ def predict(self, predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarra
 
         # If none of the above applies, then something must have gone wrong.
         else:
-            raise ValueError("The dimensions of ensemble predictions"
-                             " and ensemble weights do not match!")
+            raise ValueError(
+                "The dimensions of ensemble predictions"
+                " and ensemble weights do not match!"
+            )
         del tmp_predictions
         return average
 
     def __str__(self) -> str:
-        trajectory_str = ' '.join([
-            f'{id}: {perf:.5f}'
-            for id, perf in enumerate(self.trajectory_)
-        ])
-        identifiers_str = ' '.join([
-            f'{identifier}'
-            for idx, identifier in enumerate(self.identifiers_)
-            if self.weights_[idx] > 0
-        ])
-        return ("Ensemble Selection:\n"
-                f"\tTrajectory: {trajectory_str}\n"
-                f"\tMembers: {self.indices_}\n"
-                f"\tWeights: {self.weights_}\n"
-                f"\tIdentifiers: {identifiers_str}\n")
+        trajectory_str = " ".join(
+            [f"{id}: {perf:.5f}" for id, perf in enumerate(self.trajectory_)]
+        )
+        identifiers_str = " ".join(
+            [
+                f"{identifier}"
+                for idx, identifier in enumerate(self.identifiers_)
+                if self.weights_[idx] > 0
+            ]
+        )
+        return (
+            "Ensemble Selection:\n"
+            f"\tTrajectory: {trajectory_str}\n"
+            f"\tMembers: {self.indices_}\n"
+            f"\tWeights: {self.weights_}\n"
+            f"\tIdentifiers: {identifiers_str}\n"
+        )
 
     def get_models_with_weights(
-        self,
-        models: BasePipeline
+        self, models: BasePipeline
     ) -> List[Tuple[float, BasePipeline]]:
         output = []
         for i, weight in enumerate(self.weights_):
diff --git a/autosklearn/ensembles/singlebest_ensemble.py b/autosklearn/ensembles/singlebest_ensemble.py
index e10eee978f..58e026dff2 100644
--- a/autosklearn/ensembles/singlebest_ensemble.py
+++ b/autosklearn/ensembles/singlebest_ensemble.py
@@ -1,8 +1,8 @@
-import os
 from typing import List, Tuple, Union
 
-import numpy as np
+import os
 
+import numpy as np
 from smac.runhistory.runhistory import RunHistory
 
 from autosklearn.automl_common.common.utils.backend import Backend
@@ -20,6 +20,7 @@ class SingleBest(AbstractEnsemble):
     object, to comply with the expected interface of an
     AbstractEnsemble.
     """
+
     def __init__(
         self,
         metric: Scorer,
@@ -38,12 +39,10 @@ def __init__(
         self.identifiers_ = self.get_identifiers_from_run_history()
 
     def get_identifiers_from_run_history(self) -> List[Tuple[int, int, float]]:
-        """
-        This method parses the run history, to identify
-        the best performing model
+        """Parses the run history, to identify the best performing model
 
-        It populates the identifiers attribute, which is used
-        by the backend to access the actual model
+        Populates the identifiers attribute, which is used by the backend to access
+        the actual model.
         """
         best_model_identifier = []
         best_model_score = self.metric._worst_possible_result
@@ -52,35 +51,38 @@ def get_identifiers_from_run_history(self) -> List[Tuple[int, int, float]]:
             run_value = self.run_history.data[run_key]
             score = self.metric._optimum - (self.metric._sign * run_value.cost)
 
-            if (score > best_model_score and self.metric._sign > 0) \
-                    or (score < best_model_score and self.metric._sign < 0):
+            if (score > best_model_score and self.metric._sign > 0) or (
+                score < best_model_score and self.metric._sign < 0
+            ):
 
                 # Make sure that the individual best model actually exists
                 model_dir = self.backend.get_numrun_directory(
                     self.seed,
-                    run_value.additional_info['num_run'],
+                    run_value.additional_info["num_run"],
                     run_key.budget,
                 )
                 model_file_name = self.backend.get_model_filename(
                     self.seed,
-                    run_value.additional_info['num_run'],
+                    run_value.additional_info["num_run"],
                     run_key.budget,
                 )
                 file_path = os.path.join(model_dir, model_file_name)
                 if not os.path.exists(file_path):
                     continue
 
-                best_model_identifier = [(
-                    self.seed,
-                    run_value.additional_info['num_run'],
-                    run_key.budget,
-                )]
+                best_model_identifier = [
+                    (
+                        self.seed,
+                        run_value.additional_info["num_run"],
+                        run_key.budget,
+                    )
+                ]
                 best_model_score = score
 
         if not best_model_identifier:
             raise ValueError(
-                "No valid model found in run history. This means smac was not able to fit"
-                " a valid model. Please check the log file for errors."
+                "No valid model found in run history. This means smac was not able to"
+                " fit a valid model. Please check the log file for errors."
             )
 
         return best_model_identifier
@@ -89,15 +91,25 @@ def predict(self, predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarra
         return predictions[0]
 
     def __str__(self) -> str:
-        return 'Single Model Selection:\n\tMembers: %s' \
-               '\n\tWeights: %s\n\tIdentifiers: %s' % \
-               (self.indices_, self.weights_,
-                ' '.join([str(identifier) for idx, identifier in
-                          enumerate(self.identifiers_)
-                          if self.weights_[idx] > 0]))
-
-    def get_models_with_weights(self, models: BasePipeline
-                                ) -> List[Tuple[float, BasePipeline]]:
+        return (
+            "Single Model Selection:\n\tMembers: %s"
+            "\n\tWeights: %s\n\tIdentifiers: %s"
+            % (
+                self.indices_,
+                self.weights_,
+                " ".join(
+                    [
+                        str(identifier)
+                        for idx, identifier in enumerate(self.identifiers_)
+                        if self.weights_[idx] > 0
+                    ]
+                ),
+            )
+        )
+
+    def get_models_with_weights(
+        self, models: BasePipeline
+    ) -> List[Tuple[float, BasePipeline]]:
         output = []
         for i, weight in enumerate(self.weights_):
             if weight > 0.0:
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
index 070230ae94..491309a7b8 100644
--- a/autosklearn/estimators.py
+++ b/autosklearn/estimators.py
@@ -1,29 +1,28 @@
 # -*- encoding: utf-8 -*-
-from typing import Any, Optional, Dict, List, Mapping, Tuple, Union, Iterable
-from typing_extensions import Literal
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 import dask.distributed
 import joblib
 import numpy as np
 import pandas as pd
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from scipy.sparse import spmatrix
 from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
 from sklearn.utils.multiclass import type_of_target
 from smac.runhistory.runhistory import RunInfo, RunValue
+from typing_extensions import Literal
 
+from autosklearn.automl import AutoML, AutoMLClassifier, AutoMLRegressor
 from autosklearn.data.validation import (
-    convert_if_sparse,
     SUPPORTED_FEAT_TYPES,
     SUPPORTED_TARGET_TYPES,
+    convert_if_sparse,
 )
-from autosklearn.pipeline.base import BasePipeline
-from autosklearn.automl import AutoMLClassifier, AutoMLRegressor, AutoML
 from autosklearn.metrics import Scorer
+from autosklearn.pipeline.base import BasePipeline
 
 
 class AutoSklearnEstimator(BaseEstimator):
-
     def __init__(
         self,
         time_left_for_this_task=3600,
@@ -36,7 +35,7 @@ def __init__(
         memory_limit=3072,
         include: Optional[Dict[str, List[str]]] = None,
         exclude: Optional[Dict[str, List[str]]] = None,
-        resampling_strategy='holdout',
+        resampling_strategy="holdout",
         resampling_strategy_arguments=None,
         tmp_folder=None,
         delete_tmp_folder_after_terminate=True,
@@ -51,7 +50,7 @@ def __init__(
         scoring_functions: Optional[List[Scorer]] = None,
         load_models: bool = True,
         get_trials_callback=None,
-        dataset_compression: Union[bool, Mapping[str, Any]] = True
+        dataset_compression: Union[bool, Mapping[str, Any]] = True,
     ):
         """
         Parameters
@@ -339,11 +338,12 @@ def __init__(
         """  # noqa (links are too long)
         # Raise error if the given total time budget is less than 30 seconds.
         if time_left_for_this_task < 30:
-            raise ValueError("Time left for this task must be at least "
-                             "30 seconds. ")
+            raise ValueError("Time left for this task must be at least " "30 seconds. ")
         self.time_left_for_this_task = time_left_for_this_task
         self.per_run_time_limit = per_run_time_limit
-        self.initial_configurations_via_metalearning = initial_configurations_via_metalearning
+        self.initial_configurations_via_metalearning = (
+            initial_configurations_via_metalearning
+        )
         self.ensemble_size = ensemble_size
         self.ensemble_nbest = ensemble_nbest
         self.max_models_on_disc = max_models_on_disc
@@ -388,12 +388,13 @@ def __getstate__(self):
 
     def build_automl(self):
 
+        initial_configs = self.initial_configurations_via_metalearning
         automl = self._get_automl_class()(
             temporary_directory=self.tmp_folder,
             delete_tmp_folder_after_terminate=self.delete_tmp_folder_after_terminate,
             time_left_for_this_task=self.time_left_for_this_task,
             per_run_time_limit=self.per_run_time_limit,
-            initial_configurations_via_metalearning=self.initial_configurations_via_metalearning,
+            initial_configurations_via_metalearning=initial_configs,
             ensemble_size=self.ensemble_size,
             ensemble_nbest=self.ensemble_nbest,
             max_models_on_disc=self.max_models_on_disc,
@@ -413,7 +414,7 @@ def build_automl(self):
             metric=self.metric,
             scoring_functions=self.scoring_functions,
             get_trials_callback=self.get_trials_callback,
-            dataset_compression=self.dataset_compression
+            dataset_compression=self.dataset_compression,
         )
 
         return automl
@@ -434,7 +435,7 @@ def fit_pipeline(
         self,
         X: SUPPORTED_FEAT_TYPES,
         y: Union[SUPPORTED_TARGET_TYPES, spmatrix],
-        config: Union[Configuration,  Dict[str, Union[str, float, int]]],
+        config: Union[Configuration, Dict[str, Union[str, float, int]]],
         dataset_name: Optional[str] = None,
         X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
         y_test: Optional[Union[SUPPORTED_TARGET_TYPES, spmatrix]] = None,
@@ -442,7 +443,7 @@ def fit_pipeline(
         *args,
         **kwargs: Dict,
     ) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
-        """ Fits and individual pipeline configuration and returns
+        """Fits and individual pipeline configuration and returns
         the result to the user.
 
         The Estimator constraints are honored, for example the resampling
@@ -451,7 +452,8 @@ def fit_pipeline(
         arguments are redirected to the TAE evaluation function, which allows for
         further customization while building a pipeline.
 
-        Any additional argument provided is directly passed to the worker exercising the run.
+        Any additional argument provided is directly passed to the
+        worker exercising the run.
 
         Parameters
         ----------
@@ -465,7 +467,7 @@ def fit_pipeline(
             If provided, the testing performance will be tracked on this labels
         config: Union[Configuration,  Dict[str, Union[str, float, int]]]
             A configuration object used to define the pipeline steps.
-            If a dictionary is passed, a configuration is created based on this dictionary.
+            If a dict is passed, a configuration is created based on this dict.
         dataset_name: Optional[str]
             Name that will be used to tag the Auto-Sklearn run and identify the
             Auto-Sklearn run
@@ -489,16 +491,27 @@ def fit_pipeline(
         """
         if self.automl_ is None:
             self.automl_ = self.build_automl()
-        return self.automl_.fit_pipeline(X=X, y=y,
-                                         dataset_name=dataset_name,
-                                         config=config,
-                                         feat_type=feat_type,
-                                         X_test=X_test, y_test=y_test,
-                                         *args, **kwargs)
-
-    def fit_ensemble(self, y, task=None, precision=32,
-                     dataset_name=None, ensemble_nbest=None,
-                     ensemble_size=None):
+        return self.automl_.fit_pipeline(
+            X=X,
+            y=y,
+            dataset_name=dataset_name,
+            config=config,
+            feat_type=feat_type,
+            X_test=X_test,
+            y_test=y_test,
+            *args,
+            **kwargs,
+        )
+
+    def fit_ensemble(
+        self,
+        y,
+        task=None,
+        precision=32,
+        dataset_name=None,
+        ensemble_nbest=None,
+        ensemble_size=None,
+    ):
         """Fit an ensemble to models trained during an optimization process.
 
         All parameters are ``None`` by default. If no other value is given,
@@ -584,30 +597,42 @@ def predict(self, X, batch_size=None, n_jobs=1):
         return self.automl_.predict(X, batch_size=batch_size, n_jobs=n_jobs)
 
     def predict_proba(self, X, batch_size=None, n_jobs=1):
-        return self.automl_.predict_proba(
-             X, batch_size=batch_size, n_jobs=n_jobs)
+        return self.automl_.predict_proba(X, batch_size=batch_size, n_jobs=n_jobs)
 
     def score(self, X, y):
         return self.automl_.score(X, y)
 
     def show_models(self):
-        """ Returns a dictionary containing dictionaries of ensemble models.
+        """Returns a dictionary containing dictionaries of ensemble models.
 
         Each model in the ensemble can be accessed by giving its ``model_id`` as key.
 
         A model dictionary contains the following:
 
         * ``"model_id"`` - The id given to a model by ``autosklearn``.
+
         * ``"rank"`` - The rank of the model based on it's ``"cost"``.
+
         * ``"cost"`` - The loss of the model on the validation set.
+
         * ``"ensemble_weight"`` - The weight given to the model in the ensemble.
+
         * ``"voting_model"`` - The ``cv_voting_ensemble`` model (for 'cv' resampling).
-        * ``"estimators"`` - List of models (dicts) in ``cv_voting_ensemble`` (for 'cv' resampling).
+
+        * ``"estimators"`` - List of models (dicts) in ``cv_voting_ensemble``
+            ('cv' resampling).
+
         * ``"data_preprocessor"`` - The preprocessor used on the data.
+
         * ``"balancing"`` - The balancing used on the data (for classification).
+
         * ``"feature_preprocessor"`` - The preprocessor for features types.
-        * ``"classifier"`` or ``"regressor"`` - The autosklearn wrapped classifier or regressor.
-        * ``"sklearn_classifier"`` or ``"sklearn_regressor"`` - The sklearn classifier or regressor.
+
+        * ``"classifier"`` / ``"regressor"``
+          - The autosklearn wrapped classifier or regressor.
+
+        * ``"sklearn_classifier"`` or ``"sklearn_regressor"``
+          - The sklearn classifier or regressor.
 
         **Example**
 
@@ -657,7 +682,7 @@ def show_models(self):
         Dict(int, Any) : dictionary of length = number of models in the ensemble
             A dictionary of models in the ensemble, where ``model_id`` is the key.
 
-        """
+        """  # noqa: E501
 
         return self.automl_.show_models()
 
@@ -709,12 +734,12 @@ def leaderboard(
         self,
         detailed: bool = False,
         ensemble_only: bool = True,
-        top_k: Union[int, Literal['all']] = 'all',
-        sort_by: str = 'cost',
-        sort_order: Literal['auto', 'ascending', 'descending'] = 'auto',
-        include: Optional[Union[str, Iterable[str]]] = None
+        top_k: Union[int, Literal["all"]] = "all",
+        sort_by: str = "cost",
+        sort_order: Literal["auto", "ascending", "descending"] = "auto",
+        include: Optional[Union[str, Iterable[str]]] = None,
     ) -> pd.DataFrame:
-        """ Returns a pandas table of results for all evaluated models.
+        """Returns a pandas table of results for all evaluated models.
 
         Gives an overview of all models trained during the search process along
         with various statistics about their training.
@@ -789,46 +814,53 @@ def leaderboard(
         # Validation of top_k
         if (
             not (isinstance(top_k, str) or isinstance(top_k, int))
-            or (isinstance(top_k, str) and top_k != 'all')
+            or (isinstance(top_k, str) and top_k != "all")
             or (isinstance(top_k, int) and top_k <= 0)
         ):
-            raise ValueError(f"top_k={top_k} must be a positive integer or pass"
-                             " `top_k`='all' to view results for all models")
+            raise ValueError(
+                f"top_k={top_k} must be a positive integer or pass"
+                " `top_k`='all' to view results for all models"
+            )
 
         # Validate columns to include
         if isinstance(include, str):
             include = [include]
 
-        if include == ['model_id']:
-            raise ValueError('Must provide more than just `model_id`')
+        if include == ["model_id"]:
+            raise ValueError("Must provide more than just `model_id`")
 
         if include is not None:
             columns = [*include]
 
             # 'model_id' should always be present as it is the unique index
             # used for pandas
-            if 'model_id' not in columns:
-                columns.append('model_id')
+            if "model_id" not in columns:
+                columns.append("model_id")
 
-            invalid_include_items = set(columns) - set(column_types['all'])
+            invalid_include_items = set(columns) - set(column_types["all"])
             if len(invalid_include_items) != 0:
-                raise ValueError(f"Values {invalid_include_items} are not known"
-                                 f" columns to include, must be contained in "
-                                 f"{column_types['all']}")
+                raise ValueError(
+                    f"Values {invalid_include_items} are not known"
+                    f" columns to include, must be contained in "
+                    f"{column_types['all']}"
+                )
         elif detailed:
-            columns = column_types['all']
+            columns = column_types["all"]
         else:
-            columns = column_types['simple']
+            columns = column_types["simple"]
 
         # Validation of sorting
-        if sort_by not in column_types['all']:
-            raise ValueError(f"sort_by='{sort_by}' must be one of included "
-                             f"columns {set(column_types['all'])}")
+        if sort_by not in column_types["all"]:
+            raise ValueError(
+                f"sort_by='{sort_by}' must be one of included "
+                f"columns {set(column_types['all'])}"
+            )
 
-        valid_sort_orders = ['auto', 'ascending', 'descending']
+        valid_sort_orders = ["auto", "ascending", "descending"]
         if not (isinstance(sort_order, str) and sort_order in valid_sort_orders):
-            raise ValueError(f"`sort_order` = {sort_order} must be a str in "
-                             f"{valid_sort_orders}")
+            raise ValueError(
+                f"`sort_order` = {sort_order} must be a str in " f"{valid_sort_orders}"
+            )
 
         # To get all the models that were optmized, we collect what we can from
         # runhistory first.
@@ -836,29 +868,31 @@ def has_key(rv, key):
             return rv.additional_info and key in rv.additional_info
 
         model_runs = {
-            rval.additional_info['num_run']: {
-                'model_id': rval.additional_info['num_run'],
-                'seed': rkey.seed,
-                'budget': rkey.budget,
-                'duration': rval.time,
-                'config_id': rkey.config_id,
-                'start_time': rval.starttime,
-                'end_time': rval.endtime,
-                'status': str(rval.status),
-                'cost': rval.cost,
-                'train_loss': rval.additional_info['train_loss']
-                if has_key(rval, 'train_loss') else None,
-                'config_origin': rval.additional_info['configuration_origin']
-                if has_key(rval, 'configuration_origin') else None
+            rval.additional_info["num_run"]: {
+                "model_id": rval.additional_info["num_run"],
+                "seed": rkey.seed,
+                "budget": rkey.budget,
+                "duration": rval.time,
+                "config_id": rkey.config_id,
+                "start_time": rval.starttime,
+                "end_time": rval.endtime,
+                "status": str(rval.status),
+                "cost": rval.cost,
+                "train_loss": rval.additional_info["train_loss"]
+                if has_key(rval, "train_loss")
+                else None,
+                "config_origin": rval.additional_info["configuration_origin"]
+                if has_key(rval, "configuration_origin")
+                else None,
             }
             for rkey, rval in self.automl_.runhistory_.data.items()
-            if has_key(rval, 'num_run')
+            if has_key(rval, "num_run")
         }
 
         # Next we get some info about the model itself
         model_class_strings = {
-            AutoMLClassifier: 'classifier',
-            AutoMLRegressor: 'regressor'
+            AutoMLClassifier: "classifier",
+            AutoMLRegressor: "regressor",
         }
         model_type = model_class_strings.get(self._get_automl_class(), None)
         if model_type is None:
@@ -868,21 +902,25 @@ def has_key(rv, key):
         configurations = self.automl_.runhistory_.ids_config
 
         for model_id, run_info in model_runs.items():
-            config_id = run_info['config_id']
+            config_id = run_info["config_id"]
             run_config = configurations[config_id]._values
 
-            run_info.update({
-                'balancing_strategy': run_config.get('balancing:strategy', None),
-                'type': run_config[f'{model_type}:__choice__'],
-                'data_preprocessors': [
-                    value for key, value in run_config.items()
-                    if 'data_preprocessing' in key and '__choice__' in key
-                ],
-                'feature_preprocessors': [
-                    value for key, value in run_config.items()
-                    if 'feature_preprocessor' in key and '__choice__' in key
-                ]
-            })
+            run_info.update(
+                {
+                    "balancing_strategy": run_config.get("balancing:strategy", None),
+                    "type": run_config[f"{model_type}:__choice__"],
+                    "data_preprocessors": [
+                        value
+                        for key, value in run_config.items()
+                        if "data_preprocessing" in key and "__choice__" in key
+                    ],
+                    "feature_preprocessors": [
+                        value
+                        for key, value in run_config.items()
+                        if "feature_preprocessor" in key and "__choice__" in key
+                    ],
+                }
+            )
 
         # Get the models ensemble weight if it has one
         # TODO both implementing classes of AbstractEnsemble have a property
@@ -892,7 +930,7 @@ def has_key(rv, key):
         #      tied together by ordering, might be better to store as tuple
         for i, weight in enumerate(self.automl_.ensemble_.weights_):
             (_, model_id, _) = self.automl_.ensemble_.identifiers_[i]
-            model_runs[model_id]['ensemble_weight'] = weight
+            model_runs[model_id]["ensemble_weight"] = weight
 
         # Filter out non-ensemble members if needed, else fill in a default
         # value of 0 if it's missing
@@ -900,65 +938,70 @@ def has_key(rv, key):
             model_runs = {
                 model_id: info
                 for model_id, info in model_runs.items()
-                if ('ensemble_weight' in info and info['ensemble_weight'] > 0)
+                if ("ensemble_weight" in info and info["ensemble_weight"] > 0)
             }
         else:
             for model_id, info in model_runs.items():
-                if 'ensemble_weight' not in info:
-                    info['ensemble_weight'] = 0
+                if "ensemble_weight" not in info:
+                    info["ensemble_weight"] = 0
 
         # `rank` relies on `cost` so we include `cost`
         # We drop it later if it's not requested
-        if 'rank' in columns and 'cost' not in columns:
-            columns = [*columns, 'cost']
+        if "rank" in columns and "cost" not in columns:
+            columns = [*columns, "cost"]
 
         # Finally, convert into a tabular format by converting the dict into
         # column wise orientation.
-        dataframe = pd.DataFrame({
-            col: [run_info[col] for run_info in model_runs.values()]
-            for col in columns if col != 'rank'
-        })
+        dataframe = pd.DataFrame(
+            {
+                col: [run_info[col] for run_info in model_runs.values()]
+                for col in columns
+                if col != "rank"
+            }
+        )
 
         # Give it an index, even if not in the `include`
-        dataframe.set_index('model_id', inplace=True)
+        dataframe.set_index("model_id", inplace=True)
 
         # Add the `rank` column if needed, dropping `cost` if it's not
         # requested by the user
-        if 'rank' in columns:
-            dataframe.sort_values(by='cost', ascending=True, inplace=True)
-            dataframe.insert(column='rank',
-                             value=range(1, len(dataframe) + 1),
-                             loc=list(columns).index('rank') - 1)  # account for `model_id`
+        if "rank" in columns:
+            dataframe.sort_values(by="cost", ascending=True, inplace=True)
+            dataframe.insert(
+                column="rank",
+                value=range(1, len(dataframe) + 1),
+                loc=list(columns).index("rank") - 1,
+            )  # account for `model_id`
 
-            if 'cost' not in columns:
-                dataframe.drop('cost', inplace=True)
+            if "cost" not in columns:
+                dataframe.drop("cost", inplace=True)
 
         # Decide on the sort order depending on what it gets sorted by
-        descending_columns = ['ensemble_weight', 'duration']
-        if sort_order == 'auto':
+        descending_columns = ["ensemble_weight", "duration"]
+        if sort_order == "auto":
             ascending_param = False if sort_by in descending_columns else True
         else:
-            ascending_param = False if sort_order == 'descending' else True
+            ascending_param = False if sort_order == "descending" else True
 
         # Sort by the given column name, defaulting to 'model_id' if not present
         if sort_by not in dataframe.columns:
-            self.automl_._logger.warning(f"sort_by = '{sort_by}' was not present"
-                                         ", defaulting to sort on the index "
-                                         "'model_id'")
-            sort_by = 'model_id'
+            self.automl_._logger.warning(
+                f"sort_by = '{sort_by}' was not present"
+                ", defaulting to sort on the index "
+                "'model_id'"
+            )
+            sort_by = "model_id"
 
         # Cost can be the same but leave rank all over the place
-        if 'rank' in columns and sort_by == 'cost':
-            dataframe.sort_values(by=[sort_by, 'rank'],
-                                  ascending=[ascending_param, True],
-                                  inplace=True)
+        if "rank" in columns and sort_by == "cost":
+            dataframe.sort_values(
+                by=[sort_by, "rank"], ascending=[ascending_param, True], inplace=True
+            )
         else:
-            dataframe.sort_values(by=sort_by,
-                                  ascending=ascending_param,
-                                  inplace=True)
+            dataframe.sort_values(by=sort_by, ascending=ascending_param, inplace=True)
 
         # Lastly, just grab the top_k
-        if top_k == 'all' or top_k >= len(dataframe):
+        if top_k == "all" or top_k >= len(dataframe):
             top_k = len(dataframe)
 
         dataframe = dataframe.head(top_k)
@@ -966,18 +1009,29 @@ def has_key(rv, key):
         return dataframe
 
     @staticmethod
-    def _leaderboard_columns() -> Dict[Literal['all', 'simple', 'detailed'], List[str]]:
+    def _leaderboard_columns() -> Dict[Literal["all", "simple", "detailed"], List[str]]:
         all = [
-            "model_id", "rank", "ensemble_weight", "type", "cost", "duration",
-            "config_id", "train_loss", "seed", "start_time", "end_time",
-            "budget", "status", "data_preprocessors", "feature_preprocessors",
-            "balancing_strategy", "config_origin"
-        ]
-        simple = [
-            "model_id", "rank", "ensemble_weight", "type", "cost", "duration"
+            "model_id",
+            "rank",
+            "ensemble_weight",
+            "type",
+            "cost",
+            "duration",
+            "config_id",
+            "train_loss",
+            "seed",
+            "start_time",
+            "end_time",
+            "budget",
+            "status",
+            "data_preprocessors",
+            "feature_preprocessors",
+            "balancing_strategy",
+            "config_origin",
         ]
+        simple = ["model_id", "rank", "ensemble_weight", "type", "cost", "duration"]
         detailed = all
-        return {'all': all, 'detailed': detailed, 'simple': simple}
+        return {"all": all, "detailed": detailed, "simple": simple}
 
     def _get_automl_class(self):
         raise NotImplementedError()
@@ -1012,23 +1066,25 @@ def get_configuration_space(
         if self.automl_ is None:
             self.automl_ = self.build_automl()
 
-        return self.automl_.fit(
-            X, y,
-            X_test=X_test, y_test=y_test,
-            dataset_name=dataset_name,
-            feat_type=feat_type,
-            only_return_configuration_space=True,
-        ) if self.automl_.configuration_space is None else self.automl_.configuration_space
+        return (
+            self.automl_.fit(
+                X,
+                y,
+                X_test=X_test,
+                y_test=y_test,
+                dataset_name=dataset_name,
+                feat_type=feat_type,
+                only_return_configuration_space=True,
+            )
+            if self.automl_.configuration_space is None
+            else self.automl_.configuration_space
+        )
 
 
 class AutoSklearnClassifier(AutoSklearnEstimator, ClassifierMixin):
-    """This class implements the classification task. """
+    """This class implements the classification task."""
 
-    def fit(self, X, y,
-            X_test=None,
-            y_test=None,
-            feat_type=None,
-            dataset_name=None):
+    def fit(self, X, y, X_test=None, y_test=None, feat_type=None, dataset_name=None):
         """Fit *auto-sklearn* to given training set (X, y).
 
         Fit both optimizes the machine learning models and builds an ensemble
@@ -1075,18 +1131,16 @@ def fit(self, X, y,
         # type of data is compatible with auto-sklearn. Legal target
         # types are: binary, multiclass, multilabel-indicator.
         target_type = type_of_target(y)
-        supported_types = ['binary', 'multiclass', 'multilabel-indicator']
+        supported_types = ["binary", "multiclass", "multilabel-indicator"]
         if target_type not in supported_types:
-            raise ValueError("Classification with data of type {} is "
-                             "not supported. Supported types are {}. "
-                             "You can find more information about scikit-learn "
-                             "data types in: "
-                             "https://scikit-learn.org/stable/modules/multiclass.html"
-                             "".format(
-                                    target_type,
-                                    supported_types
-                                )
-                             )
+            raise ValueError(
+                "Classification with data of type {} is "
+                "not supported. Supported types are {}. "
+                "You can find more information about scikit-learn "
+                "data types in: "
+                "https://scikit-learn.org/stable/modules/multiclass.html"
+                "".format(target_type, supported_types)
+            )
 
         # remember target type for using in predict_proba later.
         self.target_type = target_type
@@ -1138,22 +1192,19 @@ def predict_proba(self, X, batch_size=None, n_jobs=1):
         y : array of shape = [n_samples, n_classes] or [n_samples, n_labels]
             The predicted class probabilities.
         """
-        pred_proba = super().predict_proba(
-            X, batch_size=batch_size, n_jobs=n_jobs)
+        pred_proba = super().predict_proba(X, batch_size=batch_size, n_jobs=n_jobs)
 
         # Check if all probabilities sum up to 1.
         # Assert only if target type is not multilabel-indicator.
-        if self.target_type not in ['multilabel-indicator']:
-            assert(
-                np.allclose(
-                    np.sum(pred_proba, axis=1),
-                    np.ones_like(pred_proba[:, 0]))
+        if self.target_type not in ["multilabel-indicator"]:
+            assert np.allclose(
+                np.sum(pred_proba, axis=1), np.ones_like(pred_proba[:, 0])
             ), "prediction probability does not sum up to 1!"
 
         # Check that all probability values lie between 0 and 1.
-        assert(
-            (pred_proba >= 0).all() and (pred_proba <= 1).all()
-        ), "found prediction probability value outside of [0, 1]!"
+        assert (pred_proba >= 0).all() and (
+            pred_proba <= 1
+        ).all(), "found prediction probability value outside of [0, 1]!"
 
         return pred_proba
 
@@ -1167,11 +1218,7 @@ class AutoSklearnRegressor(AutoSklearnEstimator, RegressorMixin):
 
     """
 
-    def fit(self, X, y,
-            X_test=None,
-            y_test=None,
-            feat_type=None,
-            dataset_name=None):
+    def fit(self, X, y, X_test=None, y_test=None, feat_type=None, dataset_name=None):
         """Fit *Auto-sklearn* to given training set (X, y).
 
         Fit both optimizes the machine learning models and builds an ensemble
@@ -1219,18 +1266,21 @@ def fit(self, X, y,
         y = convert_if_sparse(y)
 
         target_type = type_of_target(y)
-        supported_types = ['continuous', 'binary', 'multiclass', 'continuous-multioutput']
+        supported_types = [
+            "continuous",
+            "binary",
+            "multiclass",
+            "continuous-multioutput",
+        ]
         if target_type not in supported_types:
-            raise ValueError("Regression with data of type {} is "
-                             "not supported. Supported types are {}. "
-                             "You can find more information about scikit-learn "
-                             "data types in: "
-                             "https://scikit-learn.org/stable/modules/multiclass.html"
-                             "".format(
-                                    target_type,
-                                    supported_types
-                                )
-                             )
+            raise ValueError(
+                "Regression with data of type {} is "
+                "not supported. Supported types are {}. "
+                "You can find more information about scikit-learn "
+                "data types in: "
+                "https://scikit-learn.org/stable/modules/multiclass.html"
+                "".format(target_type, supported_types)
+            )
 
         # Fit is supposed to be idempotent!
         # But not if we use share_mode.
diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py
index 506cf51441..51ad69fbb3 100644
--- a/autosklearn/evaluation/__init__.py
+++ b/autosklearn/evaluation/__init__.py
@@ -1,42 +1,42 @@
 # -*- encoding: utf-8 -*-
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, cast
+
 import functools
-import logging
 import json
+import logging
 import math
 import multiprocessing
-from queue import Empty
 import time
 import traceback
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, cast
+from queue import Empty
 
-from ConfigSpace import Configuration
 import numpy as np
 import pynisher
+from ConfigSpace import Configuration
+from sklearn.model_selection._split import (
+    BaseCrossValidator,
+    BaseShuffleSplit,
+    _RepeatedSplits,
+)
 from smac.runhistory.runhistory import RunInfo, RunValue
 from smac.stats.stats import Stats
 from smac.tae import StatusType, TAEAbortException
 from smac.tae.execute_func import AbstractTAFunc
 
-from sklearn.model_selection._split import _RepeatedSplits, BaseShuffleSplit,\
-    BaseCrossValidator
-
-from autosklearn.automl_common.common.utils.backend import Backend
-
-from autosklearn.metrics import Scorer
-import autosklearn.evaluation.train_evaluator
 import autosklearn.evaluation.test_evaluator
+import autosklearn.evaluation.train_evaluator
 import autosklearn.evaluation.util
 import autosklearn.pipeline.components
+from autosklearn.automl_common.common.utils.backend import Backend
 from autosklearn.evaluation.train_evaluator import TYPE_ADDITIONAL_INFO
+from autosklearn.metrics import Scorer
 from autosklearn.util.logging_ import PickableLoggerAdapter, get_named_client_logger
 from autosklearn.util.parallel import preload_modules
 
 
 def fit_predict_try_except_decorator(
-        ta: Callable,
-        queue: multiprocessing.Queue,
-        cost_for_crash: float,
-        **kwargs: Any) -> None:
+    ta: Callable, queue: multiprocessing.Queue, cost_for_crash: float, **kwargs: Any
+) -> None:
 
     try:
         return ta(queue=queue, **kwargs)
@@ -48,7 +48,8 @@ def fit_predict_try_except_decorator(
         exception_traceback = traceback.format_exc()
         error_message = repr(e)
 
-        # Printing stuff to stdout just in case the queue doesn't work, which happened with the
+        # Printing stuff to stdout just in case the queue doesn't work,
+        # which happened with the
         # following traceback:
         #     File "auto-sklearn/autosklearn/evaluation/__init__.py", line 29, in fit_predict_try_except_decorator  # noqa E501
         #     return ta(queue=queue, **kwargs)
@@ -64,14 +65,23 @@ def fit_predict_try_except_decorator(
         #     self._thread.start()
         #     File "miniconda/3-4.5.4/envs/autosklearn/lib/python3.7/threading.py", line 847, in start  # noqa E501
         #     RuntimeError: can't start new thread
-        print("Exception handling in `fit_predict_try_except_decorator`: "
-              "traceback: %s \nerror message: %s" % (exception_traceback, error_message))
-
-        queue.put({'loss': cost_for_crash,
-                   'additional_run_info': {'traceback': exception_traceback,
-                                           'error': error_message},
-                   'status': StatusType.CRASHED,
-                   'final_queue_element': True}, block=True)
+        print(
+            "Exception handling in `fit_predict_try_except_decorator`: "
+            "traceback: %s \nerror message: %s" % (exception_traceback, error_message)
+        )
+
+        queue.put(
+            {
+                "loss": cost_for_crash,
+                "additional_run_info": {
+                    "traceback": exception_traceback,
+                    "error": error_message,
+                },
+                "status": StatusType.CRASHED,
+                "final_queue_element": True,
+            },
+            block=True,
+        )
         queue.close()
 
 
@@ -94,8 +104,9 @@ def get_cost_of_crash(metric: Scorer) -> float:
     return worst_possible_result
 
 
-def _encode_exit_status(exit_status: Union[str, int, Type[BaseException]]
-                        ) -> Union[str, int]:
+def _encode_exit_status(
+    exit_status: Union[str, int, Type[BaseException]]
+) -> Union[str, int]:
     try:
         # If it can be dumped, then it is int
         exit_status = cast(int, exit_status)
@@ -108,12 +119,13 @@ def _encode_exit_status(exit_status: Union[str, int, Type[BaseException]]
 # TODO potentially log all inputs to this class to pickle them in order to do
 # easier debugging of potential crashes
 class ExecuteTaFuncWithQueue(AbstractTAFunc):
-
     def __init__(
         self,
         backend: Backend,
         autosklearn_seed: int,
-        resampling_strategy: Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit],
+        resampling_strategy: Union[
+            str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+        ],
         metric: Scorer,
         cost_for_crash: float,
         abort_on_first_run_crash: bool,
@@ -121,7 +133,7 @@ def __init__(
         pynisher_context: str,
         initial_num_run: int = 1,
         stats: Optional[Stats] = None,
-        run_obj: str = 'quality',
+        run_obj: str = "quality",
         par_factor: int = 1,
         scoring_functions: Optional[List[Scorer]] = None,
         output_y_hat_optimization: bool = True,
@@ -135,26 +147,29 @@ def __init__(
         **resampling_strategy_args: Any,
     ):
 
-        if resampling_strategy == 'holdout':
+        if resampling_strategy == "holdout":
             eval_function = autosklearn.evaluation.train_evaluator.eval_holdout
-        elif resampling_strategy == 'holdout-iterative-fit':
-            eval_function = autosklearn.evaluation.train_evaluator.eval_iterative_holdout
-        elif resampling_strategy == 'cv-iterative-fit':
+        elif resampling_strategy == "holdout-iterative-fit":
+            eval_function = (
+                autosklearn.evaluation.train_evaluator.eval_iterative_holdout
+            )
+        elif resampling_strategy == "cv-iterative-fit":
             eval_function = autosklearn.evaluation.train_evaluator.eval_iterative_cv
-        elif resampling_strategy == 'cv' or isinstance(resampling_strategy, (
-            BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit)
+        elif resampling_strategy == "cv" or isinstance(
+            resampling_strategy, (BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit)
         ):
             eval_function = autosklearn.evaluation.train_evaluator.eval_cv
-        elif resampling_strategy == 'partial-cv':
+        elif resampling_strategy == "partial-cv":
             eval_function = autosklearn.evaluation.train_evaluator.eval_partial_cv
-        elif resampling_strategy == 'partial-cv-iterative-fit':
-            eval_function = autosklearn.evaluation.train_evaluator.eval_partial_cv_iterative
-        elif resampling_strategy == 'test':
+        elif resampling_strategy == "partial-cv-iterative-fit":
+            eval_function = (
+                autosklearn.evaluation.train_evaluator.eval_partial_cv_iterative
+            )
+        elif resampling_strategy == "test":
             eval_function = autosklearn.evaluation.test_evaluator.eval_t
             output_y_hat_optimization = False
         else:
-            raise ValueError('Unknown resampling strategy %s' %
-                             resampling_strategy)
+            raise ValueError("Unknown resampling strategy %s" % resampling_strategy)
 
         self.worst_possible_result = cost_for_crash
 
@@ -181,7 +196,7 @@ def __init__(
         self.resampling_strategy = resampling_strategy
         self.resampling_strategy_args = resampling_strategy_args
         self.scoring_functions = scoring_functions
-        # TODO deactivate output_y_hat_optimization and let the respective evaluator decide
+        # TODO deactivate output_y_hat_optimization and let respective evaluator decide
         self.output_y_hat_optimization = output_y_hat_optimization
         self.include = include
         self.exclude = exclude
@@ -194,11 +209,11 @@ def __init__(
         self.memory_limit = memory_limit
 
         dm = self.backend.load_datamanager()
-        if 'X_valid' in dm.data and 'Y_valid' in dm.data:
+        if "X_valid" in dm.data and "Y_valid" in dm.data:
             self._get_validation_loss = True
         else:
             self._get_validation_loss = False
-        if 'X_test' in dm.data and 'Y_test' in dm.data:
+        if "X_test" in dm.data and "Y_test" in dm.data:
             self._get_test_loss = True
         else:
             self._get_test_loss = False
@@ -206,7 +221,9 @@ def __init__(
         self.port = port
         self.pynisher_context = pynisher_context
         if self.port is None:
-            self.logger: Union[logging.Logger, PickableLoggerAdapter] = logging.getLogger("TAE")
+            self.logger: Union[
+                logging.Logger, PickableLoggerAdapter
+            ] = logging.getLogger("TAE")
         else:
             self.logger = get_named_client_logger(
                 name="TAE",
@@ -236,18 +253,23 @@ def run_wrapper(
         if self.budget_type is None:
             if run_info.budget != 0:
                 raise ValueError(
-                    'If budget_type is None, budget must be.0, but is %f' % run_info.budget
+                    "If budget_type is None, budget must be.0, but is %f"
+                    % run_info.budget
                 )
         else:
             if run_info.budget == 0:
                 run_info = run_info._replace(budget=100)
             elif run_info.budget <= 0 or run_info.budget > 100:
-                raise ValueError('Illegal value for budget, must be >0 and <=100, but is %f' %
-                                 run_info.budget)
-            if self.budget_type not in ('subsample', 'iterations', 'mixed'):
-                raise ValueError("Illegal value for budget type, must be one of "
-                                 "('subsample', 'iterations', 'mixed'), but is : %s" %
-                                 self.budget_type)
+                raise ValueError(
+                    "Illegal value for budget, must be >0 and <=100, but is %f"
+                    % run_info.budget
+                )
+            if self.budget_type not in ("subsample", "iterations", "mixed"):
+                raise ValueError(
+                    "Illegal value for budget type, must be one of "
+                    "('subsample', 'iterations', 'mixed'), but is : %s"
+                    % self.budget_type
+                )
 
         remaining_time = self.stats.get_remaing_time_budget()
 
@@ -255,11 +277,15 @@ def run_wrapper(
             run_info = run_info._replace(cutoff=int(remaining_time - 5))
 
         config_id = (
-            run_info.config if isinstance(run_info.config, int) else run_info.config.config_id
+            run_info.config
+            if isinstance(run_info.config, int)
+            else run_info.config.config_id
         )
 
         if run_info.cutoff < 1.0:
-            self.logger.info("Not starting configuration %d because time is up" % config_id)
+            self.logger.info(
+                "Not starting configuration %d because time is up" % config_id
+            )
             return run_info, RunValue(
                 status=StatusType.STOP,
                 cost=self.worst_possible_result,
@@ -268,9 +294,8 @@ def run_wrapper(
                 starttime=time.time(),
                 endtime=time.time(),
             )
-        elif (
-            run_info.cutoff != int(np.ceil(run_info.cutoff))
-            and not isinstance(run_info.cutoff, int)
+        elif run_info.cutoff != int(np.ceil(run_info.cutoff)) and not isinstance(
+            run_info.cutoff, int
         ):
             run_info = run_info._replace(cutoff=int(np.ceil(run_info.cutoff)))
 
@@ -285,7 +310,9 @@ def run(
         seed: int = 12345,
         budget: float = 0.0,
         instance_specific: Optional[str] = None,
-    ) -> Tuple[StatusType, float, float, Dict[str, Union[int, float, str, Dict, List, Tuple]]]:
+    ) -> Tuple[
+        StatusType, float, float, Dict[str, Union[int, float, str, Dict, List, Tuple]]
+    ]:
 
         # Additional information of each of the tae executions
         # Defined upfront for mypy
@@ -295,14 +322,16 @@ def run(
         preload_modules(context)
         queue = context.Queue()
 
-        if not (instance_specific is None or instance_specific == '0'):
+        if not (instance_specific is None or instance_specific == "0"):
             raise ValueError(instance_specific)
-        init_params = {'instance': instance}
+        init_params = {"instance": instance}
         if self.init_params is not None:
             init_params.update(self.init_params)
 
         if self.port is None:
-            logger: Union[logging.Logger, PickableLoggerAdapter] = logging.getLogger("pynisher")
+            logger: Union[logging.Logger, PickableLoggerAdapter] = logging.getLogger(
+                "pynisher"
+            )
         else:
             logger = get_named_client_logger(
                 name="pynisher",
@@ -341,9 +370,9 @@ def run(
             additional_components=autosklearn.pipeline.components.base._addons,
         )
 
-        if self.resampling_strategy != 'test':
-            obj_kwargs['resampling_strategy'] = self.resampling_strategy
-            obj_kwargs['resampling_strategy_args'] = self.resampling_strategy_args
+        if self.resampling_strategy != "test":
+            obj_kwargs["resampling_strategy"] = self.resampling_strategy
+            obj_kwargs["resampling_strategy_args"] = self.resampling_strategy_args
 
         try:
             obj = pynisher.enforce_limits(**arguments)(self.ta)
@@ -351,31 +380,38 @@ def run(
         except Exception as e:
             exception_traceback = traceback.format_exc()
             error_message = repr(e)
-            additional_run_info.update({
-                'traceback': exception_traceback,
-                'error': error_message
-            })
-            return StatusType.CRASHED, self.worst_possible_result, 0.0, additional_run_info
+            additional_run_info.update(
+                {"traceback": exception_traceback, "error": error_message}
+            )
+            return (
+                StatusType.CRASHED,
+                self.worst_possible_result,
+                0.0,
+                additional_run_info,
+            )
 
-        if obj.exit_status in (pynisher.TimeoutException, pynisher.MemorylimitException):
+        if obj.exit_status in (
+            pynisher.TimeoutException,
+            pynisher.MemorylimitException,
+        ):
             # Even if the pynisher thinks that a timeout or memout occured,
             # it can be that the target algorithm wrote something into the queue
             #  - then we treat it as a succesful run
             try:
                 info = autosklearn.evaluation.util.read_queue(queue)
-                result = info[-1]['loss']
-                status = info[-1]['status']
-                additional_run_info = info[-1]['additional_run_info']
+                result = info[-1]["loss"]
+                status = info[-1]["status"]
+                additional_run_info = info[-1]["additional_run_info"]
 
                 if obj.stdout:
-                    additional_run_info['subprocess_stdout'] = obj.stdout
+                    additional_run_info["subprocess_stdout"] = obj.stdout
                 if obj.stderr:
-                    additional_run_info['subprocess_stderr'] = obj.stderr
+                    additional_run_info["subprocess_stderr"] = obj.stderr
 
                 if obj.exit_status is pynisher.TimeoutException:
-                    additional_run_info['info'] = 'Run stopped because of timeout.'
+                    additional_run_info["info"] = "Run stopped because of timeout."
                 elif obj.exit_status is pynisher.MemorylimitException:
-                    additional_run_info['info'] = 'Run stopped because of memout.'
+                    additional_run_info["info"] = "Run stopped because of memout."
 
                 if status in [StatusType.SUCCESS, StatusType.DONOTADVANCE]:
                     cost = result
@@ -386,11 +422,13 @@ def run(
                 info = None
                 if obj.exit_status is pynisher.TimeoutException:
                     status = StatusType.TIMEOUT
-                    additional_run_info = {'error': 'Timeout'}
+                    additional_run_info = {"error": "Timeout"}
                 elif obj.exit_status is pynisher.MemorylimitException:
                     status = StatusType.MEMOUT
                     additional_run_info = {
-                        "error": "Memout (used more than {} MB).".format(self.memory_limit)
+                        "error": "Memout (used more than {} MB).".format(
+                            self.memory_limit
+                        )
                     }
                 else:
                     raise ValueError(obj.exit_status)
@@ -400,99 +438,111 @@ def run(
             info = None
             status = StatusType.ABORT
             cost = self.worst_possible_result
-            additional_run_info = {'error': 'Your configuration of '
-                                            'auto-sklearn does not work!',
-                                   'exit_status': _encode_exit_status(obj.exit_status),
-                                   'subprocess_stdout': obj.stdout,
-                                   'subprocess_stderr': obj.stderr,
-                                   }
+            additional_run_info = {
+                "error": "Your configuration of " "auto-sklearn does not work!",
+                "exit_status": _encode_exit_status(obj.exit_status),
+                "subprocess_stdout": obj.stdout,
+                "subprocess_stderr": obj.stderr,
+            }
 
         else:
             try:
                 info = autosklearn.evaluation.util.read_queue(queue)
-                result = info[-1]['loss']
-                status = info[-1]['status']
-                additional_run_info = info[-1]['additional_run_info']
+                result = info[-1]["loss"]
+                status = info[-1]["status"]
+                additional_run_info = info[-1]["additional_run_info"]
 
                 if obj.exit_status == 0:
                     cost = result
                 else:
                     status = StatusType.CRASHED
                     cost = self.worst_possible_result
-                    additional_run_info['info'] = 'Run treated as crashed ' \
-                                                  'because the pynisher exit ' \
-                                                  'status %s is unknown.' % \
-                                                  str(obj.exit_status)
-                    additional_run_info['exit_status'] = _encode_exit_status(obj.exit_status)
-                    additional_run_info['subprocess_stdout'] = obj.stdout
-                    additional_run_info['subprocess_stderr'] = obj.stderr
+                    additional_run_info["info"] = (
+                        "Run treated as crashed "
+                        "because the pynisher exit "
+                        "status %s is unknown." % str(obj.exit_status)
+                    )
+                    additional_run_info["exit_status"] = _encode_exit_status(
+                        obj.exit_status
+                    )
+                    additional_run_info["subprocess_stdout"] = obj.stdout
+                    additional_run_info["subprocess_stderr"] = obj.stderr
             except Empty:
                 info = None
                 additional_run_info = {
-                    'error': 'Result queue is empty',
-                    'exit_status': _encode_exit_status(obj.exit_status),
-                    'subprocess_stdout': obj.stdout,
-                    'subprocess_stderr': obj.stderr,
-                    'exitcode': obj.exitcode
+                    "error": "Result queue is empty",
+                    "exit_status": _encode_exit_status(obj.exit_status),
+                    "subprocess_stdout": obj.stdout,
+                    "subprocess_stderr": obj.stderr,
+                    "exitcode": obj.exitcode,
                 }
                 status = StatusType.CRASHED
                 cost = self.worst_possible_result
 
         if (
-            (self.budget_type is None or budget == 0)
-            and status == StatusType.DONOTADVANCE
-        ):
+            self.budget_type is None or budget == 0
+        ) and status == StatusType.DONOTADVANCE:
             status = StatusType.SUCCESS
 
         if not isinstance(additional_run_info, dict):
-            additional_run_info = {'message': additional_run_info}
+            additional_run_info = {"message": additional_run_info}
 
         if (
             info is not None
-            and self.resampling_strategy in ('holdout-iterative-fit', 'cv-iterative-fit')
+            and self.resampling_strategy
+            in ("holdout-iterative-fit", "cv-iterative-fit")
             and status != StatusType.CRASHED
         ):
             learning_curve = autosklearn.evaluation.util.extract_learning_curve(info)
             learning_curve_runtime = autosklearn.evaluation.util.extract_learning_curve(
-                info, 'duration'
+                info, "duration"
             )
             if len(learning_curve) > 1:
-                additional_run_info['learning_curve'] = learning_curve
-                additional_run_info['learning_curve_runtime'] = learning_curve_runtime
+                additional_run_info["learning_curve"] = learning_curve
+                additional_run_info["learning_curve_runtime"] = learning_curve_runtime
 
             train_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
-                info, 'train_loss'
+                info, "train_loss"
             )
             if len(train_learning_curve) > 1:
-                additional_run_info['train_learning_curve'] = train_learning_curve
-                additional_run_info['learning_curve_runtime'] = learning_curve_runtime
+                additional_run_info["train_learning_curve"] = train_learning_curve
+                additional_run_info["learning_curve_runtime"] = learning_curve_runtime
 
             if self._get_validation_loss:
-                validation_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
-                    info, 'validation_loss',
+                validation_learning_curve = (
+                    autosklearn.evaluation.util.extract_learning_curve(
+                        info,
+                        "validation_loss",
+                    )
                 )
                 if len(validation_learning_curve) > 1:
-                    additional_run_info['validation_learning_curve'] = \
-                        validation_learning_curve
                     additional_run_info[
-                        'learning_curve_runtime'] = learning_curve_runtime
+                        "validation_learning_curve"
+                    ] = validation_learning_curve
+                    additional_run_info[
+                        "learning_curve_runtime"
+                    ] = learning_curve_runtime
 
             if self._get_test_loss:
-                test_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
-                    info, 'test_loss',
+                test_learning_curve = (
+                    autosklearn.evaluation.util.extract_learning_curve(
+                        info,
+                        "test_loss",
+                    )
                 )
                 if len(test_learning_curve) > 1:
-                    additional_run_info['test_learning_curve'] = test_learning_curve
+                    additional_run_info["test_learning_curve"] = test_learning_curve
                     additional_run_info[
-                        'learning_curve_runtime'] = learning_curve_runtime
+                        "learning_curve_runtime"
+                    ] = learning_curve_runtime
 
         if isinstance(config, int):
-            origin = 'DUMMY'
+            origin = "DUMMY"
             config_id = config
         else:
-            origin = getattr(config, 'origin', 'UNKNOWN')
+            origin = getattr(config, "origin", "UNKNOWN")
             config_id = config.config_id
-        additional_run_info['configuration_origin'] = origin
+        additional_run_info["configuration_origin"] = origin
 
         runtime = float(obj.wall_clock_time)
 
diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index 36d51d7e0d..bc0be0e8d8 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -1,43 +1,36 @@
+from typing import Any, Dict, List, Optional, TextIO, Tuple, Type, Union, cast
+
 import logging
 import multiprocessing
 import time
 import warnings
-from typing import Any, Dict, List, Optional, TextIO, Tuple, Type, Union, cast
 
 import numpy as np
-
+from ConfigSpace import Configuration
 from sklearn.base import BaseEstimator
 from sklearn.dummy import DummyClassifier, DummyRegressor
 from sklearn.ensemble import VotingClassifier, VotingRegressor
-
 from smac.tae import StatusType
-
 from threadpoolctl import threadpool_limits
 
-from autosklearn.automl_common.common.utils.backend import Backend
-
 import autosklearn.pipeline.classification
 import autosklearn.pipeline.regression
-from autosklearn.pipeline.components.base import ThirdPartyComponents, _addons
+from autosklearn.automl_common.common.utils.backend import Backend
 from autosklearn.constants import (
     CLASSIFICATION_TASKS,
-    REGRESSION_TASKS,
-    MULTILABEL_CLASSIFICATION,
     MULTICLASS_CLASSIFICATION,
-    MULTIOUTPUT_REGRESSION
+    MULTILABEL_CLASSIFICATION,
+    MULTIOUTPUT_REGRESSION,
+    REGRESSION_TASKS,
 )
+from autosklearn.metrics import Scorer, calculate_loss
+from autosklearn.pipeline.components.base import ThirdPartyComponents, _addons
 from autosklearn.pipeline.implementations.util import (
-    convert_multioutput_multiclass_to_multilabel
+    convert_multioutput_multiclass_to_multilabel,
 )
-from autosklearn.metrics import calculate_loss, Scorer
 from autosklearn.util.logging_ import PicklableClientLogger, get_named_client_logger
 
-from ConfigSpace import Configuration
-
-
-__all__ = [
-    'AbstractEvaluator'
-]
+__all__ = ["AbstractEvaluator"]
 
 
 # General TYPE definitions for numpy
@@ -66,37 +59,39 @@ def __init__(
         self.exclude = exclude
 
     def pre_transform(
-        self,
-        X: np.ndarray,
-        y: np.ndarray,
-        fit_params: Optional[Dict[str, Any]] = None
+        self, X: np.ndarray, y: np.ndarray, fit_params: Optional[Dict[str, Any]] = None
     ) -> Tuple[np.ndarray, Dict[str, Any]]:  # pylint: disable=R0201
         if fit_params is None:
             fit_params = {}
         return X, fit_params
 
-    def fit(self, X: np.ndarray, y: np.ndarray,
-            sample_weight: Optional[Union[np.ndarray, List]] = None
-            ) -> DummyClassifier:
-        return super(MyDummyClassifier, self).fit(np.ones((X.shape[0], 1)), y,
-                                                  sample_weight=sample_weight)
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        sample_weight: Optional[Union[np.ndarray, List]] = None,
+    ) -> DummyClassifier:
+        return super(MyDummyClassifier, self).fit(
+            np.ones((X.shape[0], 1)), y, sample_weight=sample_weight
+        )
 
-    def fit_estimator(self, X: np.ndarray, y: np.ndarray,
-                      fit_params: Optional[Dict[str, Any]] = None) -> DummyClassifier:
+    def fit_estimator(
+        self, X: np.ndarray, y: np.ndarray, fit_params: Optional[Dict[str, Any]] = None
+    ) -> DummyClassifier:
         return self.fit(X, y)
 
-    def predict_proba(self, X: np.ndarray, batch_size: int = 1000
-                      ) -> np.ndarray:
+    def predict_proba(self, X: np.ndarray, batch_size: int = 1000) -> np.ndarray:
         new_X = np.ones((X.shape[0], 1))
         probas = super(MyDummyClassifier, self).predict_proba(new_X)
-        probas = convert_multioutput_multiclass_to_multilabel(probas).astype(
-            np.float32)
+        probas = convert_multioutput_multiclass_to_multilabel(probas).astype(np.float32)
         return probas
 
     def estimator_supports_iterative_fit(self) -> bool:  # pylint: disable=R0201
         return False
 
-    def get_additional_run_info(self) -> Optional[TYPE_ADDITIONAL_INFO]:  # pylint: disable=R0201
+    def get_additional_run_info(
+        self,
+    ) -> Optional[TYPE_ADDITIONAL_INFO]:  # pylint: disable=R0201
         return None
 
 
@@ -112,9 +107,9 @@ def __init__(
     ):
         self.config = config
         if config == 1:
-            super(MyDummyRegressor, self).__init__(strategy='mean')
+            super(MyDummyRegressor, self).__init__(strategy="mean")
         else:
-            super(MyDummyRegressor, self).__init__(strategy='median')
+            super(MyDummyRegressor, self).__init__(strategy="median")
         self.random_state = random_state
         self.init_params = init_params
         self.dataset_properties = dataset_properties
@@ -122,23 +117,25 @@ def __init__(
         self.exclude = exclude
 
     def pre_transform(
-        self,
-        X: np.ndarray,
-        y: np.ndarray,
-        fit_params: Optional[Dict[str, Any]] = None
+        self, X: np.ndarray, y: np.ndarray, fit_params: Optional[Dict[str, Any]] = None
     ) -> Tuple[np.ndarray, Dict[str, Any]]:  # pylint: disable=R0201
         if fit_params is None:
             fit_params = {}
         return X, fit_params
 
-    def fit(self, X: np.ndarray, y: np.ndarray,
-            sample_weight: Optional[Union[np.ndarray, List]] = None
-            ) -> DummyRegressor:
-        return super(MyDummyRegressor, self).fit(np.ones((X.shape[0], 1)), y,
-                                                 sample_weight=sample_weight)
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        sample_weight: Optional[Union[np.ndarray, List]] = None,
+    ) -> DummyRegressor:
+        return super(MyDummyRegressor, self).fit(
+            np.ones((X.shape[0], 1)), y, sample_weight=sample_weight
+        )
 
-    def fit_estimator(self, X: np.ndarray, y: np.ndarray,
-                      fit_params: Optional[Dict[str, Any]] = None) -> DummyRegressor:
+    def fit_estimator(
+        self, X: np.ndarray, y: np.ndarray, fit_params: Optional[Dict[str, Any]] = None
+    ) -> DummyRegressor:
         return self.fit(X, y)
 
     def predict(self, X: np.ndarray, batch_size: int = 1000) -> np.ndarray:
@@ -148,7 +145,9 @@ def predict(self, X: np.ndarray, batch_size: int = 1000) -> np.ndarray:
     def estimator_supports_iterative_fit(self) -> bool:  # pylint: disable=R0201
         return False
 
-    def get_additional_run_info(self) -> Optional[TYPE_ADDITIONAL_INFO]:  # pylint: disable=R0201
+    def get_additional_run_info(
+        self,
+    ) -> Optional[TYPE_ADDITIONAL_INFO]:  # pylint: disable=R0201
         return None
 
 
@@ -156,7 +155,7 @@ def _fit_and_suppress_warnings(
     logger: Union[logging.Logger, PicklableClientLogger],
     model: BaseEstimator,
     X: np.ndarray,
-    y: np.ndarray
+    y: np.ndarray,
 ) -> BaseEstimator:
     def send_warnings_to_log(
         message: Union[Warning, str],
@@ -166,8 +165,7 @@ def send_warnings_to_log(
         file: Optional[TextIO] = None,
         line: Optional[str] = None,
     ) -> None:
-        logger.debug('%s:%s: %s:%s' %
-                     (filename, lineno, str(category), message))
+        logger.debug("%s:%s: %s:%s" % (filename, lineno, str(category), message))
         return
 
     with warnings.catch_warnings():
@@ -212,13 +210,13 @@ def __init__(
         self.include = include
         self.exclude = exclude
 
-        self.X_valid = self.datamanager.data.get('X_valid')
-        self.y_valid = self.datamanager.data.get('Y_valid')
-        self.X_test = self.datamanager.data.get('X_test')
-        self.y_test = self.datamanager.data.get('Y_test')
+        self.X_valid = self.datamanager.data.get("X_valid")
+        self.y_valid = self.datamanager.data.get("Y_valid")
+        self.X_test = self.datamanager.data.get("X_test")
+        self.y_test = self.datamanager.data.get("Y_test")
 
         self.metric = metric
-        self.task_type = self.datamanager.info['task']
+        self.task_type = self.datamanager.info["task"]
         self.seed = seed
 
         self.output_y_hat_optimization = output_y_hat_optimization
@@ -227,25 +225,26 @@ def __init__(
         if isinstance(disable_file_output, (bool, list)):
             self.disable_file_output: Union[bool, List[str]] = disable_file_output
         else:
-            raise ValueError('disable_file_output should be either a bool or a list')
+            raise ValueError("disable_file_output should be either a bool or a list")
 
         if self.task_type in REGRESSION_TASKS:
             if not isinstance(self.configuration, Configuration):
                 self.model_class = MyDummyRegressor
             else:
-                self.model_class = \
+                self.model_class = (
                     autosklearn.pipeline.regression.SimpleRegressionPipeline
+                )
             self.predict_function = self._predict_regression
         else:
             if not isinstance(self.configuration, Configuration):
                 self.model_class = MyDummyClassifier
             else:
-                self.model_class = autosklearn.pipeline.classification.SimpleClassificationPipeline
+                self.model_class = (
+                    autosklearn.pipeline.classification.SimpleClassificationPipeline
+                )
             self.predict_function = self._predict_proba
 
-        self._init_params = {
-            'data_preprocessor:feat_type': self.datamanager.feat_type
-        }
+        self._init_params = {"data_preprocessor:feat_type": self.datamanager.feat_type}
 
         if init_params is not None:
             self._init_params.update(init_params)
@@ -254,8 +253,11 @@ def __init__(
             num_run = 0
         self.num_run = num_run
 
-        logger_name = '%s(%d):%s' % (self.__class__.__name__.split('.')[-1],
-                                     self.seed, self.datamanager.name)
+        logger_name = "%s(%d):%s" % (
+            self.__class__.__name__.split(".")[-1],
+            self.seed,
+            self.datamanager.name,
+        )
 
         if self.port is None:
             self.logger = logging.getLogger(__name__)
@@ -271,12 +273,14 @@ def __init__(
         self.budget = budget
         self.budget_type = budget_type
 
-        # Add 3rd-party components to the list of 3rd-party components in case this wasn't done
-        # before (this happens if we run in parallel and the components are only passed to the
-        # AbstractEvaluator via the TAE and are not there yet because the worker is in its own
-        # process).
+        # Add 3rd-party components to the list of 3rd-party components in case this
+        # wasn't done before (this happens if we run in parallel and the components
+        # are only passed to the AbstractEvaluator via the TAE and are not there
+        # yet because the worker is in its own process).
         for key in additional_components:
-            for component_name, component in additional_components[key].components.items():
+            for component_name, component in additional_components[
+                key
+            ].components.items():
                 if component_name not in _addons[key].components:
                     _addons[key].add_component(component)
 
@@ -285,34 +289,41 @@ def __init__(
 
     def _get_model(self) -> BaseEstimator:
         if not isinstance(self.configuration, Configuration):
-            model = self.model_class(config=self.configuration,
-                                     random_state=self.seed,
-                                     init_params=self._init_params)
+            model = self.model_class(
+                config=self.configuration,
+                random_state=self.seed,
+                init_params=self._init_params,
+            )
         else:
             if self.task_type in REGRESSION_TASKS:
                 dataset_properties = {
-                    'task': self.task_type,
-                    'sparse': self.datamanager.info['is_sparse'] == 1,
-                    'multioutput': self.task_type == MULTIOUTPUT_REGRESSION,
+                    "task": self.task_type,
+                    "sparse": self.datamanager.info["is_sparse"] == 1,
+                    "multioutput": self.task_type == MULTIOUTPUT_REGRESSION,
                 }
             else:
                 dataset_properties = {
-                    'task': self.task_type,
-                    'sparse': self.datamanager.info['is_sparse'] == 1,
-                    'multilabel': self.task_type == MULTILABEL_CLASSIFICATION,
-                    'multiclass': self.task_type == MULTICLASS_CLASSIFICATION,
+                    "task": self.task_type,
+                    "sparse": self.datamanager.info["is_sparse"] == 1,
+                    "multilabel": self.task_type == MULTILABEL_CLASSIFICATION,
+                    "multiclass": self.task_type == MULTICLASS_CLASSIFICATION,
                 }
-            model = self.model_class(config=self.configuration,
-                                     dataset_properties=dataset_properties,
-                                     random_state=self.seed,
-                                     include=self.include,
-                                     exclude=self.exclude,
-                                     init_params=self._init_params)
+            model = self.model_class(
+                config=self.configuration,
+                dataset_properties=dataset_properties,
+                random_state=self.seed,
+                include=self.include,
+                exclude=self.exclude,
+                init_params=self._init_params,
+            )
         return model
 
-    def _loss(self, y_true: np.ndarray, y_hat: np.ndarray,
-              scoring_functions: Optional[List[Scorer]] = None
-              ) -> Union[float, Dict[str, float]]:
+    def _loss(
+        self,
+        y_true: np.ndarray,
+        y_hat: np.ndarray,
+        scoring_functions: Optional[List[Scorer]] = None,
+    ) -> Union[float, Dict[str, float]]:
         """Auto-sklearn follows a minimization goal.
         The calculate_loss internally translate a score function to
         a minimization problem.
@@ -324,9 +335,7 @@ def _loss(self, y_true: np.ndarray, y_hat: np.ndarray,
             y_true
         """
         scoring_functions = (
-            self.scoring_functions
-            if scoring_functions is None
-            else scoring_functions
+            self.scoring_functions if scoring_functions is None else scoring_functions
         )
         if not isinstance(self.configuration, Configuration):
             if scoring_functions:
@@ -335,8 +344,12 @@ def _loss(self, y_true: np.ndarray, y_hat: np.ndarray,
                 return self.metric._worst_possible_result
 
         return calculate_loss(
-            y_true, y_hat, self.task_type, self.metric,
-            scoring_functions=scoring_functions)
+            y_true,
+            y_hat,
+            self.task_type,
+            self.metric,
+            scoring_functions=scoring_functions,
+        )
 
     def finish_up(
         self,
@@ -349,28 +362,35 @@ def finish_up(
         file_output: bool,
         final_call: bool,
         status: StatusType,
-    ) -> Tuple[float, Union[float, Dict[str, float]], int,
-               Dict[str, Union[str, int, float, Dict, List, Tuple]]]:
-        """This function does everything necessary after the fitting is done:
+    ) -> Tuple[
+        float,
+        Union[float, Dict[str, float]],
+        int,
+        Dict[str, Union[str, int, float, Dict, List, Tuple]],
+    ]:
+        """Do everything necessary after the fitting is done:
 
         * predicting
         * saving the files for the ensembles_statistics
         * generate output for SMAC
         We use it as the signal handler so we can recycle the code for the
-        normal usecase and when the runsolver kills us here :)"""
-
+        normal usecase and when the runsolver kills us here :)
+        """
         self.duration = time.time() - self.starttime
 
         if file_output:
             file_out_loss, additional_run_info_ = self.file_output(
-                opt_pred, valid_pred, test_pred,
+                opt_pred,
+                valid_pred,
+                test_pred,
             )
         else:
             file_out_loss = None
             additional_run_info_ = {}
 
         validation_loss, test_loss = self.calculate_auxiliary_losses(
-            valid_pred, test_pred,
+            valid_pred,
+            test_pred,
         )
 
         if file_out_loss is not None:
@@ -382,25 +402,25 @@ def finish_up(
         else:
             loss_ = {}
 
-        additional_run_info = (
-            {} if additional_run_info is None else additional_run_info
-        )
+        additional_run_info = {} if additional_run_info is None else additional_run_info
         for metric_name, value in loss_.items():
             additional_run_info[metric_name] = value
-        additional_run_info['duration'] = self.duration
-        additional_run_info['num_run'] = self.num_run
+        additional_run_info["duration"] = self.duration
+        additional_run_info["num_run"] = self.num_run
         if train_loss is not None:
-            additional_run_info['train_loss'] = train_loss
+            additional_run_info["train_loss"] = train_loss
         if validation_loss is not None:
-            additional_run_info['validation_loss'] = validation_loss
+            additional_run_info["validation_loss"] = validation_loss
         if test_loss is not None:
-            additional_run_info['test_loss'] = test_loss
+            additional_run_info["test_loss"] = test_loss
 
-        rval_dict = {'loss': loss,
-                     'additional_run_info': additional_run_info,
-                     'status': status}
+        rval_dict = {
+            "loss": loss,
+            "additional_run_info": additional_run_info,
+            "status": status,
+        }
         if final_call:
-            rval_dict['final_queue_element'] = True
+            rval_dict["final_queue_element"] = True
 
         self.queue.put(rval_dict)
         return self.duration, loss_, self.seed, additional_run_info_
@@ -413,7 +433,8 @@ def calculate_auxiliary_losses(
         if Y_valid_pred is not None:
             if self.y_valid is not None:
                 validation_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
-                    self.y_valid, Y_valid_pred)
+                    self.y_valid, Y_valid_pred
+                )
                 if isinstance(validation_loss, dict):
                     validation_loss = validation_loss[self.metric.name]
             else:
@@ -424,7 +445,8 @@ def calculate_auxiliary_losses(
         if Y_test_pred is not None:
             if self.y_test is not None:
                 test_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
-                    self.y_test, Y_test_pred)
+                    self.y_test, Y_test_pred
+                )
                 if isinstance(test_loss, dict):
                     test_loss = test_loss[self.metric.name]
             else:
@@ -451,27 +473,24 @@ def file_output(
             return (
                 1.0,
                 {
-                    'error':
-                        "Targets %s and prediction %s don't have "
-                        "the same length. Probably training didn't "
-                        "finish" % (np.shape(self.Y_optimization), Y_optimization_pred.shape)
-                 },
+                    "error": "Targets %s and prediction %s don't have "
+                    "the same length. Probably training didn't "
+                    "finish"
+                    % (np.shape(self.Y_optimization), Y_optimization_pred.shape)
+                },
             )
 
         # Abort if predictions contain NaNs
         for y, s in [
             # Y_train_pred deleted here. Fix unittest accordingly.
-            [Y_optimization_pred, 'optimization'],
-            [Y_valid_pred, 'validation'],
-            [Y_test_pred, 'test']
+            [Y_optimization_pred, "optimization"],
+            [Y_valid_pred, "validation"],
+            [Y_test_pred, "test"],
         ]:
             if y is not None and not np.all(np.isfinite(y)):
                 return (
                     1.0,
-                    {
-                        'error':
-                            'Model predictions for %s set contains NaNs.' % s
-                    },
+                    {"error": "Model predictions for %s set contains NaNs." % s},
                 )
 
         # Abort if we don't want to output anything.
@@ -489,17 +508,20 @@ def file_output(
         self.disable_file_output = cast(List, self.disable_file_output)
 
         # This file can be written independently of the others down bellow
-        if ('y_optimization' not in self.disable_file_output):
+        if "y_optimization" not in self.disable_file_output:
             if self.output_y_hat_optimization:
                 self.backend.save_targets_ensemble(self.Y_optimization)
 
         models: Optional[BaseEstimator] = None
-        if hasattr(self, 'models'):
-            if len(self.models) > 0 and self.models[0] is not None:  # type: ignore[attr-defined]
-                if ('models' not in self.disable_file_output):
+        if hasattr(self, "models"):
+            if len(self.models) > 0 and self.models[0] is not None:
+                if "models" not in self.disable_file_output:
 
                     if self.task_type in CLASSIFICATION_TASKS:
-                        models = VotingClassifier(estimators=None, voting='soft', )
+                        models = VotingClassifier(
+                            estimators=None,
+                            voting="soft",
+                        )
                     else:
                         models = VotingRegressor(estimators=None)
                     # Mypy cannot understand hasattr yet
@@ -509,24 +531,30 @@ def file_output(
             seed=self.seed,
             idx=self.num_run,
             budget=self.budget,
-            model=self.model if 'model' not in self.disable_file_output else None,
-            cv_model=models if 'cv_model' not in self.disable_file_output else None,
+            model=self.model if "model" not in self.disable_file_output else None,
+            cv_model=models if "cv_model" not in self.disable_file_output else None,
             ensemble_predictions=(
-                Y_optimization_pred if 'y_optimization' not in self.disable_file_output else None
+                Y_optimization_pred
+                if "y_optimization" not in self.disable_file_output
+                else None
             ),
             valid_predictions=(
-                Y_valid_pred if 'y_valid' not in self.disable_file_output else None
+                Y_valid_pred if "y_valid" not in self.disable_file_output else None
             ),
             test_predictions=(
-                Y_test_pred if 'y_test' not in self.disable_file_output else None
+                Y_test_pred if "y_test" not in self.disable_file_output else None
             ),
         )
 
         return None, {}
 
-    def _predict_proba(self, X: np.ndarray, model: BaseEstimator,
-                       task_type: int, Y_train: Optional[np.ndarray] = None,
-                       ) -> np.ndarray:
+    def _predict_proba(
+        self,
+        X: np.ndarray,
+        model: BaseEstimator,
+        task_type: int,
+        Y_train: Optional[np.ndarray] = None,
+    ) -> np.ndarray:
         def send_warnings_to_log(
             message: Union[Warning, str],
             category: Type[Warning],
@@ -535,8 +563,9 @@ def send_warnings_to_log(
             file: Optional[TextIO] = None,
             line: Optional[str] = None,
         ) -> None:
-            self.logger.debug('%s:%s: %s:%s' %
-                              (filename, lineno, str(category), message))
+            self.logger.debug(
+                "%s:%s: %s:%s" % (filename, lineno, str(category), message)
+            )
             return
 
         with warnings.catch_warnings():
@@ -549,8 +578,13 @@ def send_warnings_to_log(
         Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
         return Y_pred
 
-    def _predict_regression(self, X: np.ndarray, model: BaseEstimator,
-                            task_type: int, Y_train: Optional[np.ndarray] = None) -> np.ndarray:
+    def _predict_regression(
+        self,
+        X: np.ndarray,
+        model: BaseEstimator,
+        task_type: int,
+        Y_train: Optional[np.ndarray] = None,
+    ) -> np.ndarray:
         def send_warnings_to_log(
             message: Union[Warning, str],
             category: Type[Warning],
@@ -559,8 +593,9 @@ def send_warnings_to_log(
             file: Optional[TextIO] = None,
             line: Optional[str] = None,
         ) -> None:
-            self.logger.debug('%s:%s: %s:%s' %
-                              (filename, lineno, str(category), message))
+            self.logger.debug(
+                "%s:%s: %s:%s" % (filename, lineno, str(category), message)
+            )
             return
 
         with warnings.catch_warnings():
@@ -572,14 +607,17 @@ def send_warnings_to_log(
 
         return Y_pred
 
-    def _ensure_prediction_array_sizes(self, prediction: np.ndarray, Y_train: np.ndarray
-                                       ) -> np.ndarray:
-        num_classes = self.datamanager.info['label_num']
+    def _ensure_prediction_array_sizes(
+        self, prediction: np.ndarray, Y_train: np.ndarray
+    ) -> np.ndarray:
+        num_classes = self.datamanager.info["label_num"]
 
-        if self.task_type == MULTICLASS_CLASSIFICATION and \
-                prediction.shape[1] < num_classes:
+        if (
+            self.task_type == MULTICLASS_CLASSIFICATION
+            and prediction.shape[1] < num_classes
+        ):
             if Y_train is None:
-                raise ValueError('Y_train must not be None!')
+                raise ValueError("Y_train must not be None!")
             classes = list(np.unique(Y_train))
 
             mapping = dict()
@@ -587,8 +625,9 @@ def _ensure_prediction_array_sizes(self, prediction: np.ndarray, Y_train: np.nda
                 if class_number in classes:
                     index = classes.index(class_number)
                     mapping[index] = class_number
-            new_predictions = np.zeros((prediction.shape[0], num_classes),
-                                       dtype=np.float32)
+            new_predictions = np.zeros(
+                (prediction.shape[0], num_classes), dtype=np.float32
+            )
 
             for index in mapping:
                 class_index = mapping[index]
diff --git a/autosklearn/evaluation/splitter.py b/autosklearn/evaluation/splitter.py
index a18e29e08a..586d92c88f 100644
--- a/autosklearn/evaluation/splitter.py
+++ b/autosklearn/evaluation/splitter.py
@@ -1,31 +1,30 @@
 import warnings
 
 import numpy as np
-
-from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
+from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
 from sklearn.model_selection._split import _validate_shuffle_split
-from sklearn.utils import indexable, check_random_state
-from sklearn.utils import _approximate_mode
-from sklearn.utils.validation import _num_samples, column_or_1d
-from sklearn.utils.validation import check_array
+from sklearn.utils import _approximate_mode, check_random_state, indexable
 from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.validation import _num_samples, check_array, column_or_1d
 
 
 class CustomStratifiedShuffleSplit(StratifiedShuffleSplit):
-    """Stratified ShuffleSplit cross-validator that deals with classes with too few samples
-    """
+    """Splitter that deals with classes with too few samples"""
 
     def _iter_indices(self, X, y, groups=None):  # type: ignore
         n_samples = _num_samples(X)
         y = check_array(y, ensure_2d=False, dtype=None)
         n_train, n_test = _validate_shuffle_split(
-            n_samples, self.test_size, self.train_size,
-            default_test_size=self._default_test_size)
+            n_samples,
+            self.test_size,
+            self.train_size,
+            default_test_size=self._default_test_size,
+        )
 
         if y.ndim == 2:
             # for multi-label y, map each distinct row to a string repr
             # using join because str(row) uses an ellipsis if len(row) > 1000
-            y = np.array([' '.join(row.astype('str')) for row in y])
+            y = np.array([" ".join(row.astype("str")) for row in y])
 
         classes, y_indices = np.unique(y, return_inverse=True)
         n_classes = classes.shape[0]
@@ -33,18 +32,21 @@ def _iter_indices(self, X, y, groups=None):  # type: ignore
         class_counts = np.bincount(y_indices)
 
         if n_train < n_classes:
-            raise ValueError('The train_size = %d should be greater or '
-                             'equal to the number of classes = %d' %
-                             (n_train, n_classes))
+            raise ValueError(
+                "The train_size = %d should be greater or "
+                "equal to the number of classes = %d" % (n_train, n_classes)
+            )
         if n_test < n_classes:
-            raise ValueError('The test_size = %d should be greater or '
-                             'equal to the number of classes = %d' %
-                             (n_test, n_classes))
+            raise ValueError(
+                "The test_size = %d should be greater or "
+                "equal to the number of classes = %d" % (n_test, n_classes)
+            )
 
         # Find the sorted list of instances for each class:
         # (np.unique above performs a sort, so code is O(n logn) already)
-        class_indices = np.split(np.argsort(y_indices, kind='mergesort'),
-                                 np.cumsum(class_counts)[:-1])
+        class_indices = np.split(
+            np.argsort(y_indices, kind="mergesort"), np.cumsum(class_counts)[:-1]
+        )
 
         rng = check_random_state(self.random_state)
 
@@ -62,18 +64,18 @@ def _iter_indices(self, X, y, groups=None):  # type: ignore
             #   Each list n_i, t_i represent the list of class in the
             #   training_set and test_set resepectively.
             #
-            #   n_i = [100, 100, 0, 3]  # 100 instance of class '0', 0 instance of class '2'
-            #   t_i = [300, 300, 1, 3]  # 300 instances of class '0', 1 instance of class '2'
+            #   n_i = [100, 100, 0, 3]  # 100 of class '0', 0 of class '2'
+            #   t_i = [300, 300, 1, 3]  # 300 of class '0', 1 of class '2'
             #
             #  To support unique labels such as class '2', which only has one sample
             #  between both n_i and t_i, we need to make sure that n_i has at least
             #  one sample of all classes. There is also the extra check to ensure
             #  that the sizes stay the same.
             #
-            #   n_i = [ 99, 100, 1, 3]  # 100 instance of class '0', 0 instance of class '2'
+            #   n_i = [ 99, 100, 1, 3]  # 100 of class '0', 0 of class '2'
             #            |       ^
             #            v       |
-            #   t_i = [301, 300, 0, 3]  # 300 instances of class '0', 1 instance of class '2'
+            #   t_i = [301, 300, 0, 3]  # 300 of class '0', 1 of class '2'
             #
             for i, class_count in enumerate(n_i):
                 if class_count == 0:
@@ -82,20 +84,21 @@ def _iter_indices(self, X, y, groups=None):  # type: ignore
 
                     j = np.argmax(n_i)
                     if n_i[j] == 1:
-                        warnings.warn("Can't respect size requirements for split.",
-                                      " The training set must contain all of the unique"
-                                      " labels that exist in the dataset.")
+                        warnings.warn(
+                            "Can't respect size requirements for split.",
+                            " The training set must contain all of the unique"
+                            " labels that exist in the dataset.",
+                        )
                     else:
                         n_i[j] -= 1
                         t_i[j] += 1
 
             for i in range(n_classes):
                 permutation = rng.permutation(class_counts[i])
-                perm_indices_class_i = class_indices[i].take(permutation,
-                                                             mode='clip')
+                perm_indices_class_i = class_indices[i].take(permutation, mode="clip")
 
-                train.extend(perm_indices_class_i[:n_i[i]])
-                test.extend(perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]])
+                train.extend(perm_indices_class_i[: n_i[i]])
+                test.extend(perm_indices_class_i[n_i[i] : n_i[i] + t_i[i]])
 
             train = rng.permutation(train)
             test = rng.permutation(test)
@@ -112,11 +115,13 @@ def _make_test_folds(self, X, y=None):  # type: ignore
         rng = check_random_state(self.random_state)
         y = np.asarray(y)
         type_of_target_y = type_of_target(y)
-        allowed_target_types = ('binary', 'multiclass')
+        allowed_target_types = ("binary", "multiclass")
         if type_of_target_y not in allowed_target_types:
             raise ValueError(
-                'Supported target types are: {}. Got {!r} instead.'.format(
-                    allowed_target_types, type_of_target_y))
+                "Supported target types are: {}. Got {!r} instead.".format(
+                    allowed_target_types, type_of_target_y
+                )
+            )
 
         y = column_or_1d(y)
 
@@ -134,13 +139,16 @@ def _make_test_folds(self, X, y=None):  # type: ignore
         # counts, but that code is unreadable.)
         y_order = np.sort(y_encoded)
         allocation = np.asarray(
-            [np.bincount(y_order[i::self.n_splits], minlength=n_classes)
-             for i in range(self.n_splits)])
+            [
+                np.bincount(y_order[i :: self.n_splits], minlength=n_classes)
+                for i in range(self.n_splits)
+            ]
+        )
 
         # To maintain the data order dependencies as best as possible within
         # the stratification constraint, we assign samples from each class in
         # blocks (and then mess that up when shuffle=True).
-        test_folds = np.empty(len(y), dtype='i')
+        test_folds = np.empty(len(y), dtype="i")
         for k in range(n_classes):
             # since the kth column of allocation stores the number of samples
             # of class k in each test set, this generates blocks of fold
@@ -157,12 +165,11 @@ def split(self, X, y=None, groups=None):  # type: ignore
         n_samples = _num_samples(X)
         if self.n_splits > n_samples:
             raise ValueError(
-                ("Cannot have number of splits n_splits={0} greater"
-                 " than the number of samples: n_samples={1}.")
-                .format(self.n_splits, n_samples))
+                f"Cannot have number of splits n_splits={self.n_splits} greater"
+                f" than the number of samples: n_samples={n_samples}."
+            )
 
         for train, test in super().split(X, y, groups):
-            # print(len(np.unique(y)), len(np.unique(y[train])), len(np.unique(y[test])))
             all_classes = np.unique(y)
             train_classes = np.unique(y[train])
             train = list(train)
@@ -179,11 +186,5 @@ def split(self, X, y=None, groups=None):  # type: ignore
                     # print(len(train), len(test))
             train = np.array(train, dtype=int)
             test = np.array(test, dtype=int)
-            # print(
-            #     len(np.unique(y)),
-            #     len(np.unique(y[train])),
-            #     len(np.unique(y[test])),
-            #     len(train), len(test),
-            # )
 
             yield train, test
diff --git a/autosklearn/evaluation/test_evaluator.py b/autosklearn/evaluation/test_evaluator.py
index 181ebce233..4b6cf8452c 100644
--- a/autosklearn/evaluation/test_evaluator.py
+++ b/autosklearn/evaluation/test_evaluator.py
@@ -1,31 +1,24 @@
 # -*- encoding: utf-8 -*-
-import multiprocessing
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-from ConfigSpace import Configuration
+import multiprocessing
 
 import numpy as np
-
+from ConfigSpace import Configuration
 from smac.tae import StatusType
 
 from autosklearn.automl_common.common.utils.backend import Backend
-
 from autosklearn.evaluation.abstract_evaluator import (
     AbstractEvaluator,
     _fit_and_suppress_warnings,
 )
+from autosklearn.metrics import Scorer, calculate_loss
 from autosklearn.pipeline.components.base import ThirdPartyComponents
-from autosklearn.metrics import calculate_loss, Scorer
 
-
-__all__ = [
-    'eval_t',
-    'TestEvaluator'
-]
+__all__ = ["eval_t", "TestEvaluator"]
 
 
 class TestEvaluator(AbstractEvaluator):
-
     def __init__(
         self,
         backend: Backend,
@@ -55,15 +48,15 @@ def __init__(
             include=include,
             exclude=exclude,
             disable_file_output=disable_file_output,
-            init_params=init_params
+            init_params=init_params,
         )
         self.configuration = configuration
 
-        self.X_train = self.datamanager.data['X_train']
-        self.Y_train = self.datamanager.data['Y_train']
+        self.X_train = self.datamanager.data["X_train"]
+        self.Y_train = self.datamanager.data["Y_train"]
 
-        self.X_test = self.datamanager.data.get('X_test')
-        self.Y_test = self.datamanager.data.get('Y_test')
+        self.X_test = self.datamanager.data.get("X_test")
+        self.Y_test = self.datamanager.data.get("Y_test")
 
         self.model = self._get_model()
 
@@ -87,23 +80,27 @@ def predict_and_loss(
     ) -> Tuple[Union[Dict[str, float], float], np.array, Any, Any]:
 
         if train:
-            Y_pred = self.predict_function(self.X_train, self.model,
-                                           self.task_type, self.Y_train)
+            Y_pred = self.predict_function(
+                self.X_train, self.model, self.task_type, self.Y_train
+            )
             err = calculate_loss(
                 solution=self.Y_train,
                 prediction=Y_pred,
                 task_type=self.task_type,
                 metric=self.metric,
-                scoring_functions=self.scoring_functions)
+                scoring_functions=self.scoring_functions,
+            )
         else:
-            Y_pred = self.predict_function(self.X_test, self.model,
-                                           self.task_type, self.Y_train)
+            Y_pred = self.predict_function(
+                self.X_test, self.model, self.task_type, self.Y_train
+            )
             err = calculate_loss(
                 solution=self.Y_test,
                 prediction=Y_pred,
                 task_type=self.task_type,
                 metric=self.metric,
-                scoring_functions=self.scoring_functions)
+                scoring_functions=self.scoring_functions,
+            )
 
         return err, Y_pred, None, None
 
@@ -129,14 +126,19 @@ def eval_t(
     budget: Optional[float] = None,
     budget_type: Optional[str] = None,
 ) -> None:
-    evaluator = TestEvaluator(configuration=config,
-                              backend=backend, metric=metric, seed=seed,
-                              port=port,
-                              queue=queue,
-                              scoring_functions=scoring_functions,
-                              include=include, exclude=exclude,
-                              disable_file_output=disable_file_output,
-                              additional_components=additional_components,
-                              init_params=init_params,)
+    evaluator = TestEvaluator(
+        configuration=config,
+        backend=backend,
+        metric=metric,
+        seed=seed,
+        port=port,
+        queue=queue,
+        scoring_functions=scoring_functions,
+        include=include,
+        exclude=exclude,
+        disable_file_output=disable_file_output,
+        additional_components=additional_components,
+        init_params=init_params,
+    )
 
     evaluator.fit_predict_and_loss()
diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py
index 558fdd3b67..7a047d3e10 100644
--- a/autosklearn/evaluation/train_evaluator.py
+++ b/autosklearn/evaluation/train_evaluator.py
@@ -1,54 +1,64 @@
-import logging
-import multiprocessing
-import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
 
 import copy
 import json
-
-from ConfigSpace import Configuration
+import logging
+import multiprocessing
+import warnings
 
 import numpy as np
-
-from smac.tae import TAEAbortException, StatusType
-
+from ConfigSpace import Configuration
 from sklearn.base import BaseEstimator
-from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit, KFold, \
-    StratifiedKFold, train_test_split, BaseCrossValidator, PredefinedSplit
-from sklearn.model_selection._split import _RepeatedSplits, BaseShuffleSplit
+from sklearn.model_selection import (
+    BaseCrossValidator,
+    KFold,
+    PredefinedSplit,
+    ShuffleSplit,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    train_test_split,
+)
+from sklearn.model_selection._split import BaseShuffleSplit, _RepeatedSplits
+from smac.tae import StatusType, TAEAbortException
 
 from autosklearn.automl_common.common.utils.backend import Backend
-
-from autosklearn.evaluation.abstract_evaluator import (
-    AbstractEvaluator,
-    TYPE_ADDITIONAL_INFO,
-    _fit_and_suppress_warnings,
-)
-from autosklearn.evaluation.splitter import CustomStratifiedShuffleSplit, CustomStratifiedKFold
-from autosklearn.data.abstract_data_manager import AbstractDataManager
 from autosklearn.constants import (
     CLASSIFICATION_TASKS,
     MULTILABEL_CLASSIFICATION,
+    MULTIOUTPUT_REGRESSION,
     REGRESSION_TASKS,
-    MULTIOUTPUT_REGRESSION
 )
-from autosklearn.data.validation import (
-     SUPPORTED_FEAT_TYPES,
-     SUPPORTED_TARGET_TYPES,
- )
-from autosklearn.pipeline.base import PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.components.base import IterativeComponent, ThirdPartyComponents
+from autosklearn.data.abstract_data_manager import AbstractDataManager
+from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES
+from autosklearn.evaluation.abstract_evaluator import (
+    TYPE_ADDITIONAL_INFO,
+    AbstractEvaluator,
+    _fit_and_suppress_warnings,
+)
+from autosklearn.evaluation.splitter import (
+    CustomStratifiedKFold,
+    CustomStratifiedShuffleSplit,
+)
 from autosklearn.metrics import Scorer
+from autosklearn.pipeline.base import PIPELINE_DATA_DTYPE
+from autosklearn.pipeline.components.base import (
+    IterativeComponent,
+    ThirdPartyComponents,
+)
 from autosklearn.util.logging_ import PicklableClientLogger
 
-
-__all__ = ['TrainEvaluator', 'eval_holdout', 'eval_iterative_holdout',
-           'eval_cv', 'eval_partial_cv', 'eval_partial_cv_iterative']
+__all__ = [
+    "TrainEvaluator",
+    "eval_holdout",
+    "eval_iterative_holdout",
+    "eval_cv",
+    "eval_partial_cv",
+    "eval_partial_cv_iterative",
+]
 
 
 def _get_y_array(y: SUPPORTED_TARGET_TYPES, task_type: int) -> SUPPORTED_TARGET_TYPES:
-    if task_type in CLASSIFICATION_TASKS and task_type != \
-            MULTILABEL_CLASSIFICATION:
+    if task_type in CLASSIFICATION_TASKS and task_type != MULTILABEL_CLASSIFICATION:
         return y.ravel()
     else:
         return y
@@ -58,29 +68,26 @@ def subsample_indices(
     train_indices: List[int],
     subsample: Optional[float],
     task_type: int,
-    Y_train: SUPPORTED_TARGET_TYPES
+    Y_train: SUPPORTED_TARGET_TYPES,
 ) -> List[int]:
 
     if not isinstance(subsample, float):
         raise ValueError(
-            'Subsample must be of type float, but is of type %s'
-            % type(subsample)
+            "Subsample must be of type float, but is of type %s" % type(subsample)
         )
     elif subsample > 1:
-        raise ValueError(
-            'Subsample must not be larger than 1, but is %f'
-            % subsample
-        )
+        raise ValueError("Subsample must not be larger than 1, but is %f" % subsample)
 
     if subsample is not None and subsample < 1:
         # Only subsample if there are more indices given to this method than
         # required to subsample because otherwise scikit-learn will complain
 
         if task_type in CLASSIFICATION_TASKS and task_type != MULTILABEL_CLASSIFICATION:
-            stratify: Optional[
-                SUPPORTED_TARGET_TYPES
-            ] = Y_train.iloc[train_indices] if hasattr(
-                Y_train, 'iloc') else Y_train[train_indices]
+            stratify: Optional[SUPPORTED_TARGET_TYPES] = (
+                Y_train.iloc[train_indices]
+                if hasattr(Y_train, "iloc")
+                else Y_train[train_indices]
+            )
         else:
             stratify = None
 
@@ -109,40 +116,55 @@ def _fit_with_budget(
     task_type: int,
 ) -> None:
     if (
-            budget_type == 'iterations'
-            or budget_type == 'mixed' and model.estimator_supports_iterative_fit()
+        budget_type == "iterations"
+        or budget_type == "mixed"
+        and model.estimator_supports_iterative_fit()
     ):
         if model.estimator_supports_iterative_fit():
             budget_factor = model.get_max_iter()
             Xt, fit_params = model.fit_transformer(
-                X_train.iloc[train_indices] if hasattr(X_train, 'iloc') else X_train[train_indices],
-                Y_train.iloc[train_indices] if hasattr(Y_train, 'iloc') else Y_train[train_indices],
+                X_train.iloc[train_indices]
+                if hasattr(X_train, "iloc")
+                else X_train[train_indices],
+                Y_train.iloc[train_indices]
+                if hasattr(Y_train, "iloc")
+                else Y_train[train_indices],
             )
 
             n_iter = int(np.ceil(budget / 100 * budget_factor))
             model.iterative_fit(
                 Xt,
-                Y_train.iloc[train_indices] if hasattr(Y_train, 'iloc') else Y_train[train_indices],
+                Y_train.iloc[train_indices]
+                if hasattr(Y_train, "iloc")
+                else Y_train[train_indices],
                 n_iter=n_iter,
                 refit=True,
-                **fit_params
+                **fit_params,
             )
         else:
             _fit_and_suppress_warnings(
                 logger,
                 model,
-                X_train.iloc[train_indices] if hasattr(X_train, 'iloc') else X_train[train_indices],
-                Y_train.iloc[train_indices] if hasattr(Y_train, 'iloc') else Y_train[train_indices],
+                X_train.iloc[train_indices]
+                if hasattr(X_train, "iloc")
+                else X_train[train_indices],
+                Y_train.iloc[train_indices]
+                if hasattr(Y_train, "iloc")
+                else Y_train[train_indices],
             )
 
     elif (
-            budget_type == 'subsample'
-            or budget_type == 'mixed' and not model.estimator_supports_iterative_fit()
+        budget_type == "subsample"
+        or budget_type == "mixed"
+        and not model.estimator_supports_iterative_fit()
     ):
 
         subsample = budget / 100
         train_indices_subset = subsample_indices(
-            train_indices, subsample, task_type, Y_train,
+            train_indices,
+            subsample,
+            task_type,
+            Y_train,
         )
         _fit_and_suppress_warnings(
             logger,
@@ -167,9 +189,12 @@ def __init__(
         scoring_functions: Optional[List[Scorer]] = None,
         seed: int = 1,
         output_y_hat_optimization: bool = True,
-        resampling_strategy: Optional[Union[str, BaseCrossValidator,
-                                            _RepeatedSplits, BaseShuffleSplit]] = None,
-        resampling_strategy_args: Optional[Dict[str, Optional[Union[float, int, str]]]] = None,
+        resampling_strategy: Optional[
+            Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit]
+        ] = None,
+        resampling_strategy_args: Optional[
+            Dict[str, Optional[Union[float, int, str]]]
+        ] = None,
         num_run: Optional[int] = None,
         budget: Optional[float] = None,
         budget_type: Optional[str] = None,
@@ -206,15 +231,17 @@ def __init__(
             self.resampling_strategy_args = resampling_strategy_args
         self.splitter = self.get_splitter(self.datamanager)
         self.num_cv_folds = self.splitter.get_n_splits(
-            groups=self.resampling_strategy_args.get('groups')
+            groups=self.resampling_strategy_args.get("groups")
         )
-        self.X_train = self.datamanager.data['X_train']
-        self.Y_train = self.datamanager.data['Y_train']
+        self.X_train = self.datamanager.data["X_train"]
+        self.Y_train = self.datamanager.data["Y_train"]
         self.Y_optimization: Optional[SUPPORTED_TARGET_TYPES] = None
         self.Y_targets = [None] * self.num_cv_folds
         self.Y_train_targets = np.ones(self.Y_train.shape) * np.NaN
         self.models = [None] * self.num_cv_folds
-        self.indices: List[Optional[Tuple[List[int], List[int]]]] = [None] * self.num_cv_folds
+        self.indices: List[Optional[Tuple[List[int], List[int]]]] = [
+            None
+        ] * self.num_cv_folds
 
         # Necessary for full CV. Makes full CV not write predictions if only
         # a subset of folds is evaluated but time is up. Complicated, because
@@ -225,8 +252,8 @@ def __init__(
 
     def fit_predict_and_loss(self, iterative: bool = False) -> None:
         """Fit, predict and compute the loss for cross-validation and
-        holdout (both iterative and non-iterative)"""
-
+        holdout (both iterative and non-iterative)
+        """
         # Define beforehand for mypy
         additional_run_info: Optional[TYPE_ADDITIONAL_INFO] = None
 
@@ -234,14 +261,18 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             if self.num_cv_folds == 1:
 
                 for train_split, test_split in self.splitter.split(
-                    self.X_train, self.Y_train,
-                    groups=self.resampling_strategy_args.get('groups')
+                    self.X_train,
+                    self.Y_train,
+                    groups=self.resampling_strategy_args.get("groups"),
                 ):
                     self.Y_optimization = self.Y_train[test_split]
                     self.Y_actual_train = self.Y_train[train_split]
-                    self._partial_fit_and_predict_iterative(0, train_indices=train_split,
-                                                            test_indices=test_split,
-                                                            add_model_to_self=True)
+                    self._partial_fit_and_predict_iterative(
+                        0,
+                        train_indices=train_split,
+                        test_indices=test_split,
+                        add_model_to_self=True,
+                    )
             else:
 
                 # Test if the model allows for an iterative fit, if not,
@@ -266,16 +297,24 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 total_n_iterations = [0] * self.num_cv_folds
                 # model.estimator_supports_iterative_fit -> true
                 # After the if above, we know estimator support iterative fit
-                model_max_iter = [cast(IterativeComponent, model).get_max_iter()
-                                  for model in self.models]
-
-                if self.budget_type in ['iterations', 'mixed'] and self.budget is None:
-                    raise ValueError(f"When budget type is {self.budget_type} the budget "
-                                     "can not be None")
+                model_max_iter = [
+                    cast(IterativeComponent, model).get_max_iter()
+                    for model in self.models
+                ]
+
+                if self.budget_type in ["iterations", "mixed"] and self.budget is None:
+                    raise ValueError(
+                        f"When budget type is {self.budget_type} the budget "
+                        "can not be None"
+                    )
 
-                if self.budget_type in ['iterations', 'mixed'] and cast(float, self.budget) > 0:
+                if (
+                    self.budget_type in ["iterations", "mixed"]
+                    and cast(float, self.budget) > 0
+                ):
                     max_n_iter_budget = int(
-                        np.ceil(cast(float, self.budget) / 100 * model_max_iter[0]))
+                        np.ceil(cast(float, self.budget) / 100 * model_max_iter[0])
+                    )
                     max_iter = min(model_max_iter[0], max_n_iter_budget)
                 else:
                     max_iter = model_max_iter[0]
@@ -283,7 +322,9 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 models_current_iters = [0] * self.num_cv_folds
 
                 Xt_array = [None] * self.num_cv_folds
-                fit_params_array = [{}] * self.num_cv_folds  # type: List[Dict[str, Any]]
+                fit_params_array = [
+                    {}
+                ] * self.num_cv_folds  # type: List[Dict[str, Any]]
 
                 y = _get_y_array(self.Y_train, self.task_type)
 
@@ -300,26 +341,33 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
                     splitter = self.get_splitter(self.datamanager)
 
-                    for i, (train_indices, test_indices) in enumerate(splitter.split(
-                            self.X_train, y,
-                            groups=self.resampling_strategy_args.get('groups')
-                    )):
+                    for i, (train_indices, test_indices) in enumerate(
+                        splitter.split(
+                            self.X_train,
+                            y,
+                            groups=self.resampling_strategy_args.get("groups"),
+                        )
+                    ):
                         if converged[i]:
                             continue
 
                         model = self.models[i]
 
                         if iterations[i] == 1:
-                            self.Y_train_targets[train_indices] = \
-                                self.Y_train.iloc[train_indices] if hasattr(
-                                    self.Y_train, 'iloc') else self.Y_train[train_indices]
+                            self.Y_train_targets[train_indices] = (
+                                self.Y_train.iloc[train_indices]
+                                if hasattr(self.Y_train, "iloc")
+                                else self.Y_train[train_indices]
+                            )
                             self.Y_targets[i] = self.Y_train[test_indices]
 
                             Xt, fit_params = model.fit_transformer(
-                                self.X_train.iloc[train_indices] if hasattr(
-                                    self.X_train, 'iloc') else self.X_train[train_indices],
-                                self.Y_train.iloc[train_indices] if hasattr(
-                                    self.Y_train, 'iloc') else self.Y_train[train_indices],
+                                self.X_train.iloc[train_indices]
+                                if hasattr(self.X_train, "iloc")
+                                else self.X_train[train_indices],
+                                self.Y_train.iloc[train_indices]
+                                if hasattr(self.Y_train, "iloc")
+                                else self.Y_train[train_indices],
                             )
                             Xt_array[i] = Xt
                             fit_params_array[i] = fit_params
@@ -328,17 +376,14 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
                         model.iterative_fit(
                             Xt_array[i],
-                            self.Y_train.iloc[train_indices] if hasattr(
-                                self.Y_train, 'iloc') else self.Y_train[train_indices],
-                            n_iter=n_iter, **fit_params_array[i]
+                            self.Y_train.iloc[train_indices]
+                            if hasattr(self.Y_train, "iloc")
+                            else self.Y_train[train_indices],
+                            n_iter=n_iter,
+                            **fit_params_array[i],
                         )
 
-                        (
-                            train_pred,
-                            opt_pred,
-                            valid_pred,
-                            test_pred
-                        ) = self._predict(
+                        (train_pred, opt_pred, valid_pred, test_pred) = self._predict(
                             model,
                             train_indices=train_indices,
                             test_indices=test_indices,
@@ -353,13 +398,14 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                         # Compute train loss of this fold and store it. train_loss could
                         # either be a scalar or a dict of scalars with metrics as keys.
                         train_loss = self._loss(
-                            self.Y_train.iloc[train_indices] if hasattr(
-                                self.Y_train, 'iloc') else self.Y_train[train_indices],
+                            self.Y_train.iloc[train_indices]
+                            if hasattr(self.Y_train, "iloc")
+                            else self.Y_train[train_indices],
                             train_pred,
                         )
                         train_losses[i] = train_loss
-                        # number of training data points for this fold. Used for weighting
-                        # the average.
+                        # Number of training data points for this fold.
+                        # Used for weighting the average.
                         train_fold_weights[i] = len(train_indices)
 
                         # Compute validation loss of this fold and store it.
@@ -382,8 +428,8 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
                         iterations[i] = iterations[i] + 1
 
-                    # Compute weights of each fold based on the number of samples in each
-                    # fold.
+                    # Compute weights of each fold based on the number of samples
+                    # in each fold.
                     train_fold_weights_percentage = [
                         w / sum(train_fold_weights) for w in train_fold_weights
                     ]
@@ -395,12 +441,17 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                     # dicts, then train_loss is computed using the target metric
                     # (self.metric).
                     if all(isinstance(elem, dict) for elem in train_losses):
-                        train_loss = np.average([train_losses[i][str(self.metric)]
-                                                 for i in range(self.num_cv_folds)],
-                                                weights=train_fold_weights_percentage,
-                                                )
+                        train_loss = np.average(
+                            [
+                                train_losses[i][str(self.metric)]
+                                for i in range(self.num_cv_folds)
+                            ],
+                            weights=train_fold_weights_percentage,
+                        )
                     else:
-                        train_loss = np.average(train_losses, weights=train_fold_weights_percentage)
+                        train_loss = np.average(
+                            train_losses, weights=train_fold_weights_percentage
+                        )
 
                     # if all_scoring_function is true, return a dict of opt_loss.
                     # Otherwise, return a scalar.
@@ -415,23 +466,36 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                                 weights=opt_fold_weights_percentage,
                             )
                     else:
-                        opt_loss = np.average(opt_losses, weights=opt_fold_weights_percentage)
+                        opt_loss = np.average(
+                            opt_losses, weights=opt_fold_weights_percentage
+                        )
 
                     Y_targets = self.Y_targets
                     Y_train_targets = self.Y_train_targets
 
                     Y_optimization_preds = np.concatenate(
-                        [Y_optimization_pred[i] for i in range(self.num_cv_folds)
-                         if Y_optimization_pred[i] is not None])
-                    Y_targets = np.concatenate([
-                        Y_targets[i] for i in range(self.num_cv_folds)
-                        if Y_targets[i] is not None
-                    ])
+                        [
+                            Y_optimization_pred[i]
+                            for i in range(self.num_cv_folds)
+                            if Y_optimization_pred[i] is not None
+                        ]
+                    )
+                    Y_targets = np.concatenate(
+                        [
+                            Y_targets[i]
+                            for i in range(self.num_cv_folds)
+                            if Y_targets[i] is not None
+                        ]
+                    )
 
                     if self.X_valid is not None:
-                        Y_valid_preds = np.array([Y_valid_pred[i]
-                                                 for i in range(self.num_cv_folds)
-                                                 if Y_valid_pred[i] is not None])
+                        Y_valid_preds = np.array(
+                            [
+                                Y_valid_pred[i]
+                                for i in range(self.num_cv_folds)
+                                if Y_valid_pred[i] is not None
+                            ]
+                        )
                         # Average the predictions of several models
                         if len(Y_valid_preds.shape) == 3:
                             Y_valid_preds = np.nanmean(Y_valid_preds, axis=0)
@@ -439,9 +503,13 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                         Y_valid_preds = None
 
                     if self.X_test is not None:
-                        Y_test_preds = np.array([Y_test_pred[i]
-                                                for i in range(self.num_cv_folds)
-                                                if Y_test_pred[i] is not None])
+                        Y_test_preds = np.array(
+                            [
+                                Y_test_pred[i]
+                                for i in range(self.num_cv_folds)
+                                if Y_test_pred[i] is not None
+                            ]
+                        )
                         # Average the predictions of several models
                         if len(Y_test_preds.shape) == 3:
                             Y_test_preds = np.nanmean(Y_test_preds, axis=0)
@@ -453,8 +521,12 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
                     self.model = self._get_model()
                     status = StatusType.DONOTADVANCE
-                    if any([model_current_iter == max_iter
-                            for model_current_iter in models_current_iters]):
+                    if any(
+                        [
+                            model_current_iter == max_iter
+                            for model_current_iter in models_current_iters
+                        ]
+                    ):
                         status = StatusType.SUCCESS
                     self.finish_up(
                         loss=opt_loss,
@@ -488,10 +560,11 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             # TODO: mention that no additional run info is possible in this
             # case! -> maybe remove full CV from the train evaluator anyway and
             # make the user implement this!
-            for i, (train_split, test_split) in enumerate(self.splitter.split(
-                    self.X_train, y,
-                    groups=self.resampling_strategy_args.get('groups')
-            )):
+            for i, (train_split, test_split) in enumerate(
+                self.splitter.split(
+                    self.X_train, y, groups=self.resampling_strategy_args.get("groups")
+                )
+            ):
 
                 # TODO add check that split is actually an integer array,
                 # not a boolean array (to allow indexed assignement of
@@ -504,11 +577,11 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                         valid_pred,
                         test_pred,
                         additional_run_info,
-                    ) = (
-                        self._partial_fit_and_predict_standard(
-                            i, train_indices=train_split, test_indices=test_split,
-                            add_model_to_self=self.num_cv_folds == 1,
-                        )
+                    ) = self._partial_fit_and_predict_standard(
+                        i,
+                        train_indices=train_split,
+                        test_indices=test_split,
+                        add_model_to_self=self.num_cv_folds == 1,
                     )
                 else:
                     (
@@ -517,11 +590,11 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                         valid_pred,
                         test_pred,
                         additional_run_info,
-                    ) = (
-                        self._partial_fit_and_predict_budget(
-                            i, train_indices=train_split, test_indices=test_split,
-                            add_model_to_self=self.num_cv_folds == 1,
-                        )
+                    ) = self._partial_fit_and_predict_budget(
+                        i,
+                        train_indices=train_split,
+                        test_indices=test_split,
+                        add_model_to_self=self.num_cv_folds == 1,
                     )
 
                 if (
@@ -531,8 +604,8 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 ):
                     raise TAEAbortException(
                         'Found additional run info "%s" in fold %d, '
-                        'but cannot handle additional run info if fold >= 1.' %
-                        (additional_run_info, i)
+                        "but cannot handle additional run info if fold >= 1."
+                        % (additional_run_info, i)
                     )
 
                 Y_train_pred[i] = train_pred
@@ -564,16 +637,21 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
             # Compute weights of each fold based on the number of samples in each
             # fold.
-            train_fold_weights = [w / sum(train_fold_weights) for w in train_fold_weights]
+            train_fold_weights = [
+                w / sum(train_fold_weights) for w in train_fold_weights
+            ]
             opt_fold_weights = [w / sum(opt_fold_weights) for w in opt_fold_weights]
 
             # train_losses is a list of either scalars or dicts. If it contains dicts,
             # then train_loss is computed using the target metric (self.metric).
             if all(isinstance(elem, dict) for elem in train_losses):
-                train_loss = np.average([train_losses[i][str(self.metric)]
-                                         for i in range(self.num_cv_folds)],
-                                        weights=train_fold_weights,
-                                        )
+                train_loss = np.average(
+                    [
+                        train_losses[i][str(self.metric)]
+                        for i in range(self.num_cv_folds)
+                    ],
+                    weights=train_fold_weights,
+                )
             else:
                 train_loss = np.average(train_losses, weights=train_fold_weights)
 
@@ -582,10 +660,10 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             if self.scoring_functions:
                 opt_loss = {}
                 for metric in opt_losses[0].keys():
-                    opt_loss[metric] = np.average([opt_losses[i][metric]
-                                                   for i in range(self.num_cv_folds)],
-                                                  weights=opt_fold_weights,
-                                                  )
+                    opt_loss[metric] = np.average(
+                        [opt_losses[i][metric] for i in range(self.num_cv_folds)],
+                        weights=opt_fold_weights,
+                    )
             else:
                 opt_loss = np.average(opt_losses, weights=opt_fold_weights)
 
@@ -593,23 +671,40 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             Y_train_targets = self.Y_train_targets
 
             Y_optimization_pred = np.concatenate(
-                [Y_optimization_pred[i] for i in range(self.num_cv_folds)
-                 if Y_optimization_pred[i] is not None])
-            Y_targets = np.concatenate([Y_targets[i] for i in range(self.num_cv_folds)
-                                        if Y_targets[i] is not None])
+                [
+                    Y_optimization_pred[i]
+                    for i in range(self.num_cv_folds)
+                    if Y_optimization_pred[i] is not None
+                ]
+            )
+            Y_targets = np.concatenate(
+                [
+                    Y_targets[i]
+                    for i in range(self.num_cv_folds)
+                    if Y_targets[i] is not None
+                ]
+            )
 
             if self.X_valid is not None:
-                Y_valid_pred = np.array([Y_valid_pred[i]
-                                         for i in range(self.num_cv_folds)
-                                         if Y_valid_pred[i] is not None])
+                Y_valid_pred = np.array(
+                    [
+                        Y_valid_pred[i]
+                        for i in range(self.num_cv_folds)
+                        if Y_valid_pred[i] is not None
+                    ]
+                )
                 # Average the predictions of several models
                 if len(np.shape(Y_valid_pred)) == 3:
                     Y_valid_pred = np.nanmean(Y_valid_pred, axis=0)
 
             if self.X_test is not None:
-                Y_test_pred = np.array([Y_test_pred[i]
-                                        for i in range(self.num_cv_folds)
-                                        if Y_test_pred[i] is not None])
+                Y_test_pred = np.array(
+                    [
+                        Y_test_pred[i]
+                        for i in range(self.num_cv_folds)
+                        if Y_test_pred[i] is not None
+                    ]
+                )
                 # Average the predictions of several models
                 if len(np.shape(Y_test_pred)) == 3:
                     Y_test_pred = np.nanmean(Y_test_pred, axis=0)
@@ -625,8 +720,8 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 # TODO check if there might be reasons for do-not-advance here!
                 status = StatusType.SUCCESS
             elif (
-                self.budget_type == 'iterations'
-                or self.budget_type == 'mixed'
+                self.budget_type == "iterations"
+                or self.budget_type == "mixed"
                 and self.model.estimator_supports_iterative_fit()
             ):
                 budget_factor = self.model.get_max_iter()
@@ -661,19 +756,21 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             )
 
     def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> None:
-        """Fit, predict and compute the loss for eval_partial_cv (both iterative and normal)"""
-
+        """Fit, predict and get loss for eval_partial_cv (iterative and normal)"""
         if fold > self.num_cv_folds:
-            raise ValueError('Cannot evaluate a fold %d which is higher than '
-                             'the number of folds %d.' % (fold, self.num_cv_folds))
+            raise ValueError(
+                "Cannot evaluate a fold %d which is higher than "
+                "the number of folds %d." % (fold, self.num_cv_folds)
+            )
         if self.budget_type is not None:
             raise NotImplementedError()
 
         y = _get_y_array(self.Y_train, self.task_type)
-        for i, (train_split, test_split) in enumerate(self.splitter.split(
-                self.X_train, y,
-                groups=self.resampling_strategy_args.get('groups')
-        )):
+        for i, (train_split, test_split) in enumerate(
+            self.splitter.split(
+                self.X_train, y, groups=self.resampling_strategy_args.get("groups")
+            )
+        ):
             if i != fold:
                 continue
             else:
@@ -685,18 +782,25 @@ def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> No
 
         if iterative:
             self._partial_fit_and_predict_iterative(
-                fold, train_indices=train_split, test_indices=test_split,
-                add_model_to_self=True)
+                fold,
+                train_indices=train_split,
+                test_indices=test_split,
+                add_model_to_self=True,
+            )
         elif self.budget_type is not None:
             raise NotImplementedError()
         else:
-            train_pred, opt_pred, valid_pred, test_pred, additional_run_info = (
-                self._partial_fit_and_predict_standard(
-                    fold,
-                    train_indices=train_split,
-                    test_indices=test_split,
-                    add_model_to_self=True,
-                )
+            (
+                train_pred,
+                opt_pred,
+                valid_pred,
+                test_pred,
+                additional_run_info,
+            ) = self._partial_fit_and_predict_standard(
+                fold,
+                train_indices=train_split,
+                test_indices=test_split,
+                add_model_to_self=True,
             )
             train_loss = self._loss(self.Y_actual_train, train_pred)
             loss = self._loss(self.Y_targets[fold], opt_pred)
@@ -720,15 +824,19 @@ def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> No
                 file_output=False,
                 final_call=True,
                 additional_run_info=None,
-                status=status
+                status=status,
             )
 
-    def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int],
-                                           test_indices: List[int],
-                                           add_model_to_self: bool) -> None:
+    def _partial_fit_and_predict_iterative(
+        self,
+        fold: int,
+        train_indices: List[int],
+        test_indices: List[int],
+        add_model_to_self: bool,
+    ) -> None:
         model = self._get_model()
 
-        self.indices[fold] = ((train_indices, test_indices))
+        self.indices[fold] = (train_indices, test_indices)
 
         # Do only output the files in the case of iterative holdout,
         # In case of iterative partial cv, no file output is needed
@@ -737,14 +845,19 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
 
         if model.estimator_supports_iterative_fit():
             Xt, fit_params = model.fit_transformer(
-                self.X_train.iloc[train_indices] if hasattr(
-                    self.Y_train, 'iloc') else self.X_train[train_indices],
-                self.Y_train.iloc[train_indices] if hasattr(
-                    self.Y_train, 'iloc') else self.Y_train[train_indices],
+                self.X_train.iloc[train_indices]
+                if hasattr(self.Y_train, "iloc")
+                else self.X_train[train_indices],
+                self.Y_train.iloc[train_indices]
+                if hasattr(self.Y_train, "iloc")
+                else self.Y_train[train_indices],
             )
 
-            self.Y_train_targets[train_indices] = self.Y_train.iloc[train_indices] if hasattr(
-                self.Y_train, 'iloc') else self.Y_train[train_indices]
+            self.Y_train_targets[train_indices] = (
+                self.Y_train.iloc[train_indices]
+                if hasattr(self.Y_train, "iloc")
+                else self.Y_train[train_indices]
+            )
 
             iteration = 1
             total_n_iteration = 0
@@ -760,19 +873,21 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
             while (
                 not model.configuration_fully_fitted() and model_current_iter < max_iter
             ):
-                n_iter = int(2**iteration/2) if iteration > 1 else 2
+                n_iter = int(2**iteration / 2) if iteration > 1 else 2
                 total_n_iteration += n_iter
                 model.iterative_fit(
                     Xt,
-                    self.Y_train.iloc[train_indices] if hasattr(
-                        self.Y_train, 'iloc') else self.Y_train[train_indices],
-                    n_iter=n_iter, **fit_params
+                    self.Y_train.iloc[train_indices]
+                    if hasattr(self.Y_train, "iloc")
+                    else self.Y_train[train_indices],
+                    n_iter=n_iter,
+                    **fit_params,
                 )
                 (
                     Y_train_pred,
                     Y_optimization_pred,
                     Y_valid_pred,
-                    Y_test_pred
+                    Y_test_pred,
                 ) = self._predict(
                     model,
                     train_indices=train_indices,
@@ -783,9 +898,10 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
                     self.model = model
 
                 train_loss = self._loss(
-                    self.Y_train.iloc[train_indices] if hasattr(
-                        self.Y_train, 'iloc') else self.Y_train[train_indices],
-                    Y_train_pred
+                    self.Y_train.iloc[train_indices]
+                    if hasattr(self.Y_train, "iloc")
+                    else self.Y_train[train_indices],
+                    Y_train_pred,
                 )
                 loss = self._loss(self.Y_train[test_indices], Y_optimization_pred)
                 additional_run_info = model.get_additional_run_info()
@@ -822,13 +938,15 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
                 Y_optimization_pred,
                 Y_valid_pred,
                 Y_test_pred,
-                additional_run_info
-            ) = self._partial_fit_and_predict_standard(fold, train_indices, test_indices,
-                                                       add_model_to_self)
+                additional_run_info,
+            ) = self._partial_fit_and_predict_standard(
+                fold, train_indices, test_indices, add_model_to_self
+            )
             train_loss = self._loss(
-                self.Y_train.iloc[train_indices] if hasattr(
-                    self.Y_train, 'iloc') else self.Y_train[train_indices],
-                Y_train_pred
+                self.Y_train.iloc[train_indices]
+                if hasattr(self.Y_train, "iloc")
+                else self.Y_train[train_indices],
+                Y_train_pred,
             )
             loss = self._loss(self.Y_train[test_indices], Y_optimization_pred)
             if self.model.estimator_supports_iterative_fit():
@@ -855,25 +973,30 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
 
     def _partial_fit_and_predict_standard(
         self,
-        fold: int, train_indices: List[int],
+        fold: int,
+        train_indices: List[int],
         test_indices: List[int],
-        add_model_to_self: bool = False
-    ) -> Tuple[PIPELINE_DATA_DTYPE,  # train_pred
-               PIPELINE_DATA_DTYPE,  # opt_pred
-               PIPELINE_DATA_DTYPE,  # valid_pred
-               PIPELINE_DATA_DTYPE,  # test_pred
-               TYPE_ADDITIONAL_INFO]:
+        add_model_to_self: bool = False,
+    ) -> Tuple[
+        PIPELINE_DATA_DTYPE,  # train_pred
+        PIPELINE_DATA_DTYPE,  # opt_pred
+        PIPELINE_DATA_DTYPE,  # valid_pred
+        PIPELINE_DATA_DTYPE,  # test_pred
+        TYPE_ADDITIONAL_INFO,
+    ]:
         model = self._get_model()
 
-        self.indices[fold] = ((train_indices, test_indices))
+        self.indices[fold] = (train_indices, test_indices)
 
         _fit_and_suppress_warnings(
             self.logger,
             model,
-            self.X_train.iloc[train_indices] if hasattr(
-                self.X_train, 'iloc') else self.X_train[train_indices],
-            self.Y_train.iloc[train_indices] if hasattr(
-                self.Y_train, 'iloc') else self.Y_train[train_indices],
+            self.X_train.iloc[train_indices]
+            if hasattr(self.X_train, "iloc")
+            else self.X_train[train_indices],
+            self.Y_train.iloc[train_indices]
+            if hasattr(self.Y_train, "iloc")
+            else self.Y_train[train_indices],
         )
 
         if add_model_to_self:
@@ -881,10 +1004,16 @@ def _partial_fit_and_predict_standard(
         else:
             self.models[fold] = model
 
-        self.Y_targets[fold] = self.Y_train.iloc[test_indices] if hasattr(
-            self.Y_train, 'iloc') else self.Y_train[test_indices]
-        self.Y_train_targets[train_indices] = self.Y_train.iloc[train_indices] if hasattr(
-            self.Y_train, 'iloc') else self.Y_train[train_indices]
+        self.Y_targets[fold] = (
+            self.Y_train.iloc[test_indices]
+            if hasattr(self.Y_train, "iloc")
+            else self.Y_train[test_indices]
+        )
+        self.Y_train_targets[train_indices] = (
+            self.Y_train.iloc[train_indices]
+            if hasattr(self.Y_train, "iloc")
+            else self.Y_train[train_indices]
+        )
 
         train_pred, opt_pred, valid_pred, test_pred = self._predict(
             model=model,
@@ -902,24 +1031,30 @@ def _partial_fit_and_predict_standard(
 
     def _partial_fit_and_predict_budget(
         self,
-        fold: int, train_indices: List[int],
+        fold: int,
+        train_indices: List[int],
         test_indices: List[int],
         add_model_to_self: bool = False,
-    ) -> Tuple[PIPELINE_DATA_DTYPE,  # train_pred
-               PIPELINE_DATA_DTYPE,  # opt_pred
-               PIPELINE_DATA_DTYPE,  # valid_pred
-               PIPELINE_DATA_DTYPE,  # test_pred
-               TYPE_ADDITIONAL_INFO]:
+    ) -> Tuple[
+        PIPELINE_DATA_DTYPE,  # train_pred
+        PIPELINE_DATA_DTYPE,  # opt_pred
+        PIPELINE_DATA_DTYPE,  # valid_pred
+        PIPELINE_DATA_DTYPE,  # test_pred
+        TYPE_ADDITIONAL_INFO,
+    ]:
 
         # This function is only called in the event budget is not None
         # Add this statement for mypy
         assert self.budget is not None
 
         model = self._get_model()
-        self.indices[fold] = ((train_indices, test_indices))
+        self.indices[fold] = (train_indices, test_indices)
         self.Y_targets[fold] = self.Y_train[test_indices]
-        self.Y_train_targets[train_indices] = self.Y_train.iloc[train_indices] if hasattr(
-            self.Y_train, 'iloc') else self.Y_train[train_indices],
+        self.Y_train_targets[train_indices] = (
+            self.Y_train.iloc[train_indices]
+            if hasattr(self.Y_train, "iloc")
+            else self.Y_train[train_indices],
+        )
 
         _fit_with_budget(
             X_train=self.X_train,
@@ -952,93 +1087,110 @@ def _partial_fit_and_predict_budget(
             additional_run_info,
         )
 
-    def _predict(self, model: BaseEstimator, test_indices: List[int],
-                 train_indices: List[int]) -> Tuple[PIPELINE_DATA_DTYPE,
-                                                    PIPELINE_DATA_DTYPE,
-                                                    PIPELINE_DATA_DTYPE,
-                                                    PIPELINE_DATA_DTYPE]:
+    def _predict(
+        self, model: BaseEstimator, test_indices: List[int], train_indices: List[int]
+    ) -> Tuple[
+        PIPELINE_DATA_DTYPE,
+        PIPELINE_DATA_DTYPE,
+        PIPELINE_DATA_DTYPE,
+        PIPELINE_DATA_DTYPE,
+    ]:
         train_pred = self.predict_function(
-            self.X_train.iloc[train_indices] if hasattr(
-                self.X_train, 'iloc') else self.X_train[train_indices],
-            model, self.task_type,
-            self.Y_train.iloc[train_indices] if hasattr(
-                self.Y_train, 'iloc') else self.Y_train[train_indices]
+            self.X_train.iloc[train_indices]
+            if hasattr(self.X_train, "iloc")
+            else self.X_train[train_indices],
+            model,
+            self.task_type,
+            self.Y_train.iloc[train_indices]
+            if hasattr(self.Y_train, "iloc")
+            else self.Y_train[train_indices],
         )
 
         opt_pred = self.predict_function(
-            self.X_train.iloc[test_indices] if hasattr(
-                self.X_train, 'iloc') else self.X_train[test_indices],
-            model, self.task_type,
-            self.Y_train.iloc[train_indices] if hasattr(
-                self.Y_train, 'iloc') else self.Y_train[train_indices]
+            self.X_train.iloc[test_indices]
+            if hasattr(self.X_train, "iloc")
+            else self.X_train[test_indices],
+            model,
+            self.task_type,
+            self.Y_train.iloc[train_indices]
+            if hasattr(self.Y_train, "iloc")
+            else self.Y_train[train_indices],
         )
 
         if self.X_valid is not None:
             X_valid = self.X_valid.copy()
-            valid_pred = self.predict_function(X_valid, model,
-                                               self.task_type,
-                                               self.Y_train[train_indices])
+            valid_pred = self.predict_function(
+                X_valid, model, self.task_type, self.Y_train[train_indices]
+            )
         else:
             valid_pred = None
 
         if self.X_test is not None:
             X_test = self.X_test.copy()
             test_pred = self.predict_function(
-                X_test, model,
+                X_test,
+                model,
                 self.task_type,
-                self.Y_train.iloc[train_indices] if hasattr(
-                    self.Y_train, 'iloc') else self.Y_train[train_indices]
+                self.Y_train.iloc[train_indices]
+                if hasattr(self.Y_train, "iloc")
+                else self.Y_train[train_indices],
             )
         else:
             test_pred = None
 
         return train_pred, opt_pred, valid_pred, test_pred
 
-    def get_splitter(self, D: AbstractDataManager) -> Union[BaseCrossValidator, _RepeatedSplits,
-                                                            BaseShuffleSplit]:
+    def get_splitter(
+        self, D: AbstractDataManager
+    ) -> Union[BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit]:
 
         if self.resampling_strategy_args is None:
             self.resampling_strategy_args = {}
 
-        if (
-                self.resampling_strategy is not None
-                and not isinstance(self.resampling_strategy, str)
+        if self.resampling_strategy is not None and not isinstance(
+            self.resampling_strategy, str
         ):
-            if 'groups' not in self.resampling_strategy_args:
-                self.resampling_strategy_args['groups'] = None
+            if "groups" not in self.resampling_strategy_args:
+                self.resampling_strategy_args["groups"] = None
 
-            if isinstance(self.resampling_strategy, (BaseCrossValidator,
-                                                     _RepeatedSplits,
-                                                     BaseShuffleSplit)):
+            if isinstance(
+                self.resampling_strategy,
+                (BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit),
+            ):
                 self.check_splitter_resampling_strategy(
-                    X=D.data['X_train'], y=D.data['Y_train'],
-                    groups=self.resampling_strategy_args.get('groups'),
-                    task=D.info['task'],
+                    X=D.data["X_train"],
+                    y=D.data["Y_train"],
+                    groups=self.resampling_strategy_args.get("groups"),
+                    task=D.info["task"],
                     resampling_strategy=self.resampling_strategy,
                 )
                 return self.resampling_strategy
 
             # If it got to this point, we are dealing with a non-supported
             # re-sampling strategy
-            raise ValueError("Unsupported resampling strategy {}/{} provided".format(
-                self.resampling_strategy,
-                type(self.resampling_strategy),
-            ))
+            raise ValueError(
+                "Unsupported resampling strategy {}/{} provided".format(
+                    self.resampling_strategy,
+                    type(self.resampling_strategy),
+                )
+            )
 
-        y = D.data['Y_train']
-        shuffle = self.resampling_strategy_args.get('shuffle', True)
+        y = D.data["Y_train"]
+        shuffle = self.resampling_strategy_args.get("shuffle", True)
         train_size = 0.67
         if self.resampling_strategy_args:
-            train_size_from_user = self.resampling_strategy_args.get('train_size')
+            train_size_from_user = self.resampling_strategy_args.get("train_size")
             if train_size_from_user is not None:
                 train_size = float(train_size_from_user)
         test_size = float("%.4f" % (1 - train_size))
 
-        if D.info['task'] in CLASSIFICATION_TASKS and D.info['task'] != MULTILABEL_CLASSIFICATION:
+        if (
+            D.info["task"] in CLASSIFICATION_TASKS
+            and D.info["task"] != MULTILABEL_CLASSIFICATION
+        ):
 
             y = y.ravel()
-            if self.resampling_strategy in ['holdout',
-                                            'holdout-iterative-fit']:
+            if self.resampling_strategy in ["holdout", "holdout-iterative-fit"]:
 
                 if shuffle:
                     try:
@@ -1050,7 +1202,7 @@ def get_splitter(self, D: AbstractDataManager) -> Union[BaseCrossValidator, _Rep
                         test_cv = copy.deepcopy(cv)
                         next(test_cv.split(y, y))
                     except ValueError as e:
-                        if 'The least populated class in y has only' in e.args[0]:
+                        if "The least populated class in y has only" in e.args[0]:
                             cv = CustomStratifiedShuffleSplit(
                                 n_splits=1,
                                 test_size=test_size,
@@ -1064,14 +1216,18 @@ def get_splitter(self, D: AbstractDataManager) -> Union[BaseCrossValidator, _Rep
                     test_fold[:tmp_train_size] = -1
                     cv = PredefinedSplit(test_fold=test_fold)
                     cv.n_splits = 1  # As sklearn is inconsistent here
-            elif self.resampling_strategy in ['cv', 'cv-iterative-fit', 'partial-cv',
-                                              'partial-cv-iterative-fit']:
+            elif self.resampling_strategy in [
+                "cv",
+                "cv-iterative-fit",
+                "partial-cv",
+                "partial-cv-iterative-fit",
+            ]:
                 if shuffle:
                     try:
                         with warnings.catch_warnings():
-                            warnings.simplefilter('error')
+                            warnings.simplefilter("error")
                             cv = StratifiedKFold(
-                                n_splits=self.resampling_strategy_args['folds'],
+                                n_splits=self.resampling_strategy_args["folds"],
                                 shuffle=shuffle,
                                 random_state=1,
                             )
@@ -1079,37 +1235,39 @@ def get_splitter(self, D: AbstractDataManager) -> Union[BaseCrossValidator, _Rep
                             next(test_cv.split(y, y))
                     except UserWarning as e:
                         print(e)
-                        if 'The least populated class in y has only' in e.args[0]:
+                        if "The least populated class in y has only" in e.args[0]:
                             cv = CustomStratifiedKFold(
-                                n_splits=self.resampling_strategy_args['folds'],
+                                n_splits=self.resampling_strategy_args["folds"],
                                 shuffle=shuffle,
                                 random_state=1,
                             )
                         else:
                             raise e
                 else:
-                    cv = KFold(n_splits=self.resampling_strategy_args['folds'],
-                               shuffle=shuffle)
+                    cv = KFold(
+                        n_splits=self.resampling_strategy_args["folds"], shuffle=shuffle
+                    )
             else:
                 raise ValueError(self.resampling_strategy)
         else:
-            if self.resampling_strategy in ['holdout',
-                                            'holdout-iterative-fit']:
+            if self.resampling_strategy in ["holdout", "holdout-iterative-fit"]:
                 # TODO shuffle not taken into account for this
                 if shuffle:
-                    cv = ShuffleSplit(n_splits=1, test_size=test_size,
-                                      random_state=1)
+                    cv = ShuffleSplit(n_splits=1, test_size=test_size, random_state=1)
                 else:
                     tmp_train_size = int(np.floor(train_size * y.shape[0]))
                     test_fold = np.zeros(y.shape[0])
                     test_fold[:tmp_train_size] = -1
                     cv = PredefinedSplit(test_fold=test_fold)
                     cv.n_splits = 1  # As sklearn is inconsistent here
-            elif self.resampling_strategy in ['cv', 'partial-cv',
-                                              'partial-cv-iterative-fit']:
+            elif self.resampling_strategy in [
+                "cv",
+                "partial-cv",
+                "partial-cv-iterative-fit",
+            ]:
                 random_state = 1 if shuffle else None
                 cv = KFold(
-                    n_splits=self.resampling_strategy_args['folds'],
+                    n_splits=self.resampling_strategy_args["folds"],
                     shuffle=shuffle,
                     random_state=random_state,
                 )
@@ -1124,16 +1282,14 @@ def check_splitter_resampling_strategy(
         y: np.ndarray,
         task: int,
         groups: Any,
-        resampling_strategy: Union[BaseCrossValidator, _RepeatedSplits,
-                                   BaseShuffleSplit],
+        resampling_strategy: Union[
+            BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+        ],
     ) -> None:
         if (
             task in CLASSIFICATION_TASKS
             and task != MULTILABEL_CLASSIFICATION
-            or (
-                task in REGRESSION_TASKS
-                and task != MULTIOUTPUT_REGRESSION
-            )
+            or (task in REGRESSION_TASKS and task != MULTIOUTPUT_REGRESSION)
         ):
             y = y.ravel()
 
@@ -1141,12 +1297,14 @@ def check_splitter_resampling_strategy(
             resampling_strategy.get_n_splits(X=X, y=y, groups=groups)
             next(resampling_strategy.split(X=X, y=y, groups=groups))
         except Exception as e:
-            raise ValueError("Unsupported resampling strategy "
-                             "{}/{} cause exception: {}".format(
-                                 resampling_strategy,
-                                 groups,
-                                 str(e),
-                             ))
+            raise ValueError(
+                "Unsupported resampling strategy "
+                "{}/{} cause exception: {}".format(
+                    resampling_strategy,
+                    groups,
+                    str(e),
+                )
+            )
 
 
 # create closure for evaluating an algorithm
@@ -1154,7 +1312,9 @@ def eval_holdout(
     queue: multiprocessing.Queue,
     config: Union[int, Configuration],
     backend: Backend,
-    resampling_strategy: Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit],
+    resampling_strategy: Union[
+        str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+    ],
     resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
     metric: Scorer,
     seed: int,
@@ -1199,7 +1359,9 @@ def eval_iterative_holdout(
     queue: multiprocessing.Queue,
     config: Union[int, Configuration],
     backend: Backend,
-    resampling_strategy: Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit],
+    resampling_strategy: Union[
+        str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+    ],
     resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
     metric: Scorer,
     seed: int,
@@ -1236,7 +1398,7 @@ def eval_iterative_holdout(
         additional_components=additional_components,
         init_params=init_params,
         budget=budget,
-        budget_type=budget_type
+        budget_type=budget_type,
     )
 
 
@@ -1244,7 +1406,9 @@ def eval_partial_cv(
     queue: multiprocessing.Queue,
     config: Union[int, Configuration],
     backend: Backend,
-    resampling_strategy: Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit],
+    resampling_strategy: Union[
+        str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+    ],
     resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
     metric: Scorer,
     seed: int,
@@ -1265,7 +1429,7 @@ def eval_partial_cv(
     if budget_type is not None:
         raise NotImplementedError()
     instance_dict: Dict[str, int] = json.loads(instance) if instance is not None else {}
-    fold = instance_dict['fold']
+    fold = instance_dict["fold"]
 
     evaluator = TrainEvaluator(
         backend=backend,
@@ -1295,7 +1459,9 @@ def eval_partial_cv_iterative(
     queue: multiprocessing.Queue,
     config: Union[int, Configuration],
     backend: Backend,
-    resampling_strategy: Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit],
+    resampling_strategy: Union[
+        str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+    ],
     resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
     metric: Scorer,
     seed: int,
@@ -1341,7 +1507,9 @@ def eval_cv(
     queue: multiprocessing.Queue,
     config: Union[int, Configuration],
     backend: Backend,
-    resampling_strategy: Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit],
+    resampling_strategy: Union[
+        str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+    ],
     resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
     metric: Scorer,
     seed: int,
@@ -1387,7 +1555,9 @@ def eval_iterative_cv(
     queue: multiprocessing.Queue,
     config: Union[int, Configuration],
     backend: Backend,
-    resampling_strategy: Union[str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit],
+    resampling_strategy: Union[
+        str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
+    ],
     resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
     metric: Scorer,
     seed: int,
diff --git a/autosklearn/evaluation/util.py b/autosklearn/evaluation/util.py
index e7483cbd26..c249c8be1c 100644
--- a/autosklearn/evaluation/util.py
+++ b/autosklearn/evaluation/util.py
@@ -3,14 +3,12 @@
 import multiprocessing
 import queue
 
+__all__ = ["read_queue"]
 
-__all__ = [
-    'read_queue'
-]
 
-
-def read_queue(queue_: multiprocessing.Queue
-               ) -> List[Dict[str, Union[str, bool, int, float, List, Dict, Tuple]]]:
+def read_queue(
+    queue_: multiprocessing.Queue,
+) -> List[Dict[str, Union[str, bool, int, float, List, Dict, Tuple]]]:
     stack = []
     while True:
         try:
@@ -21,8 +19,8 @@ def read_queue(queue_: multiprocessing.Queue
         # Check if there is a special placeholder value which tells us that
         # we don't have to wait until the queue times out in order to
         # retrieve the final value!
-        if 'final_queue_element' in rval:
-            del rval['final_queue_element']
+        if "final_queue_element" in rval:
+            del rval["final_queue_element"]
             do_break = True
         else:
             do_break = False
@@ -46,12 +44,13 @@ def empty_queue(queue_: multiprocessing.Queue) -> None:
     queue_.close()
 
 
-def extract_learning_curve(stack: List[Dict[str, Any]],
-                           key: Optional[str] = None) -> List[float]:
+def extract_learning_curve(
+    stack: List[Dict[str, Any]], key: Optional[str] = None
+) -> List[float]:
     learning_curve = []
     for entry in stack:
         if key:
-            learning_curve.append(entry['additional_run_info'][key])
+            learning_curve.append(entry["additional_run_info"][key])
         else:
-            learning_curve.append(entry['loss'])
+            learning_curve.append(entry["loss"])
     return list(learning_curve)
diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py
index 749dfc6611..7068270a8e 100644
--- a/autosklearn/experimental/askl2.py
+++ b/autosklearn/experimental/askl2.py
@@ -1,59 +1,65 @@
+from typing import Any, Dict, List, Mapping, Optional, Union
+
 import hashlib
 import json
 import os
 import pathlib
 import pickle
-from typing import Any, Dict, List, Optional, Union, Mapping
 
 import dask.distributed
-import scipy.sparse
-
-from ConfigSpace import Configuration
 import numpy as np
 import pandas as pd
+import scipy.sparse
 import sklearn
+from ConfigSpace import Configuration
 
 import autosklearn
-from autosklearn.classification import AutoSklearnClassifier
 import autosklearn.experimental.selector
-from autosklearn.metrics import Scorer, balanced_accuracy, roc_auc, log_loss, accuracy
+from autosklearn.classification import AutoSklearnClassifier
+from autosklearn.metrics import Scorer, accuracy, balanced_accuracy, log_loss, roc_auc
 
 metrics = (balanced_accuracy, roc_auc, log_loss)
 selector_files = {}
 this_directory = pathlib.Path(__file__).resolve().parent
 for metric in metrics:
-    training_data_file = this_directory / metric.name / 'askl2_training_data.json'
+    training_data_file = this_directory / metric.name / "askl2_training_data.json"
     with open(training_data_file) as fh:
         training_data = json.load(fh)
         fh.seek(0)
         m = hashlib.md5()
-        m.update(fh.read().encode('utf8'))
+        m.update(fh.read().encode("utf8"))
     training_data_hash = m.hexdigest()[:10]
     selector_filename = "askl2_selector_%s_%s_%s_%s.pkl" % (
         autosklearn.__version__,
         sklearn.__version__,
         metric.name,
-        training_data_hash
+        training_data_hash,
     )
-    selector_directory = os.environ.get('XDG_CACHE_HOME')
+    selector_directory = os.environ.get("XDG_CACHE_HOME")
     if selector_directory is None:
         selector_directory = pathlib.Path.home()
-    selector_directory = pathlib.Path(selector_directory).joinpath('auto-sklearn').expanduser()
+    selector_directory = (
+        pathlib.Path(selector_directory).joinpath("auto-sklearn").expanduser()
+    )
     selector_files[metric.name] = selector_directory / selector_filename
-    metafeatures = pd.DataFrame(training_data['metafeatures'])
-    strategies = training_data['strategies']
-    y_values = pd.DataFrame(training_data['y_values'], columns=strategies, index=metafeatures.index)
-    minima_for_methods = training_data['minima_for_methods']
-    maxima_for_methods = training_data['maxima_for_methods']
-    default_strategies = training_data['tie_break_order']
+    metafeatures = pd.DataFrame(training_data["metafeatures"])
+    strategies = training_data["strategies"]
+    y_values = pd.DataFrame(
+        training_data["y_values"], columns=strategies, index=metafeatures.index
+    )
+    minima_for_methods = training_data["minima_for_methods"]
+    maxima_for_methods = training_data["maxima_for_methods"]
+    default_strategies = training_data["tie_break_order"]
     if not selector_files[metric.name].exists():
         selector = autosklearn.experimental.selector.OVORF(
-            configuration=training_data['configuration'],
+            configuration=training_data["configuration"],
             random_state=np.random.RandomState(1),
             n_estimators=500,
             tie_break_order=default_strategies,
         )
-        selector = autosklearn.experimental.selector.FallbackWrapper(selector, default_strategies)
+        selector = autosklearn.experimental.selector.FallbackWrapper(
+            selector, default_strategies
+        )
         selector.fit(
             X=metafeatures,
             y=y_values,
@@ -63,12 +69,14 @@
         selector_files[metric.name].parent.mkdir(exist_ok=True, parents=True)
 
         try:
-            with open(selector_files[metric.name], 'wb') as fh:
+            with open(selector_files[metric.name], "wb") as fh:
                 pickle.dump(selector, fh)
         except Exception as e:
-            print("AutoSklearn2Classifier needs to create a selector file under "
-                  "the user's home directory or XDG_CACHE_HOME. Nevertheless "
-                  "the path {} is not writable.".format(selector_files[metric.name]))
+            print(
+                "AutoSklearn2Classifier needs to create a selector file under "
+                "the user's home directory or XDG_CACHE_HOME. Nevertheless "
+                "the path {} is not writable.".format(selector_files[metric.name])
+            )
             raise e
 
 
@@ -87,9 +95,9 @@ def __call__(
         dask_client,
     ):
         from smac.facade.smac_ac_facade import SMAC4AC
+        from smac.intensification.simple_intensifier import SimpleIntensifier
         from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
         from smac.scenario.scenario import Scenario
-        from smac.intensification.simple_intensifier import SimpleIntensifier
 
         scenario = Scenario(scenario_dict)
 
@@ -151,7 +159,7 @@ def __call__(
                 pass
 
         rh2EPM = RunHistory2EPM4LogCost
-        ta_kwargs['budget_type'] = self.budget_type
+        ta_kwargs["budget_type"] = self.budget_type
 
         smac4ac = SMAC4AC(
             scenario=scenario,
@@ -163,10 +171,10 @@ def __call__(
             run_id=seed,
             intensifier=SuccessiveHalving,
             intensifier_kwargs={
-                'initial_budget': self.initial_budget,
-                'max_budget': 100,
-                'eta': self.eta,
-                'min_chall': 1,
+                "initial_budget": self.initial_budget,
+                "max_budget": 100,
+                "eta": self.eta,
+                "min_chall": 1,
             },
             dask_client=dask_client,
             n_jobs=n_jobs,
@@ -178,7 +186,6 @@ def __call__(
 
 
 class AutoSklearn2Classifier(AutoSklearnClassifier):
-
     def __init__(
         self,
         time_left_for_this_task: int = 3600,
@@ -198,7 +205,7 @@ def __init__(
         metric: Optional[Scorer] = None,
         scoring_functions: Optional[List[Scorer]] = None,
         load_models: bool = True,
-        dataset_compression: Union[bool, Mapping[str, Any]] = True
+        dataset_compression: Union[bool, Mapping[str, Any]] = True,
     ):
 
         """
@@ -240,11 +247,11 @@ def __init__(
             Memory limit in MB for the machine learning algorithm.
             `auto-sklearn` will stop fitting the machine learning algorithm if
             it tries to allocate more than ``memory_limit`` MB.
-            
-            **Important notes:** 
-            
+
+            **Important notes:**
+
             * If ``None`` is provided, no memory limit is set.
-            * In case of multi-processing, ``memory_limit`` will be *per job*, so the total usage is 
+            * In case of multi-processing, ``memory_limit`` will be *per job*, so the total usage is
               ``n_jobs x memory_limit``.
             * The memory limit also applies to the ensemble creation process.
 
@@ -258,12 +265,12 @@ def __init__(
 
         n_jobs : int, optional, experimental
             The number of jobs to run in parallel for ``fit()``. ``-1`` means
-            using all processors. 
-            
-            **Important notes**: 
-            
-            * By default, Auto-sklearn uses one core. 
-            * Ensemble building is not affected by ``n_jobs`` but can be controlled by the number 
+            using all processors.
+
+            **Important notes**:
+
+            * By default, Auto-sklearn uses one core.
+            * Ensemble building is not affected by ``n_jobs`` but can be controlled by the number
               of models in the ensemble.
             * ``predict()`` is not affected by ``n_jobs`` (in contrast to most scikit-learn models)
             * If ``dask_client`` is ``None``, a new dask client is created.
@@ -319,11 +326,18 @@ def __init__(
         """  # noqa (links are too long)
 
         include_estimators = [
-            'extra_trees', 'passive_aggressive', 'random_forest', 'sgd', 'gradient_boosting', 'mlp',
+            "extra_trees",
+            "passive_aggressive",
+            "random_forest",
+            "sgd",
+            "gradient_boosting",
+            "mlp",
         ]
         include_preprocessors = ["no_preprocessing"]
-        include = {'classifier': include_estimators,
-                   'feature_preprocessor': include_preprocessors}
+        include = {
+            "classifier": include_estimators,
+            "feature_preprocessor": include_preprocessors,
+        }
         super().__init__(
             time_left_for_this_task=time_left_for_this_task,
             per_run_time_limit=per_run_time_limit,
@@ -351,31 +365,40 @@ def __init__(
             load_models=load_models,
         )
 
-    def fit(self, X, y,
-            X_test=None,
-            y_test=None,
-            metric=None,
-            feat_type=None,
-            dataset_name=None):
+    def fit(
+        self,
+        X,
+        y,
+        X_test=None,
+        y_test=None,
+        metric=None,
+        feat_type=None,
+        dataset_name=None,
+    ):
 
         # TODO
-        # regularly check https://github.com/scikit-learn/scikit-learn/issues/15336 whether
-        # histogram gradient boosting in scikit-learn finally support sparse data
+        # regularly check https://github.com/scikit-learn/scikit-learn/issues/15336
+        # whether histogram gradient boosting in scikit-learn finally support
+        # sparse data
         is_sparse = scipy.sparse.issparse(X)
         if is_sparse:
             include_estimators = [
-                'extra_trees', 'passive_aggressive', 'random_forest', 'sgd', 'mlp',
+                "extra_trees",
+                "passive_aggressive",
+                "random_forest",
+                "sgd",
+                "mlp",
             ]
         else:
             include_estimators = [
-                'extra_trees',
-                'passive_aggressive',
-                'random_forest',
-                'sgd',
-                'gradient_boosting',
-                'mlp',
+                "extra_trees",
+                "passive_aggressive",
+                "random_forest",
+                "sgd",
+                "gradient_boosting",
+                "mlp",
             ]
-        self.include['classifier'] = include_estimators
+        self.include["classifier"] = include_estimators
 
         if self.metric is None:
             if len(y.shape) == 1 or y.shape[1] == 1:
@@ -387,71 +410,76 @@ def fit(self, X, y,
             metric_name = self.metric.name
             selector_file = selector_files[metric_name]
         else:
-            metric_name = 'balanced_accuracy'
+            metric_name = "balanced_accuracy"
             selector_file = selector_files[metric_name]
-        with open(selector_file, 'rb') as fh:
+        with open(selector_file, "rb") as fh:
             selector = pickle.load(fh)
 
-        metafeatures = pd.DataFrame({dataset_name: [X.shape[1], X.shape[0]]}).transpose()
+        metafeatures = pd.DataFrame(
+            {dataset_name: [X.shape[1], X.shape[0]]}
+        ).transpose()
         selection = np.argmax(selector.predict(metafeatures))
         automl_policy = strategies[selection]
 
         setting = {
-            'RF_None_holdout_iterative_es_if': {
-                'resampling_strategy': 'holdout-iterative-fit',
-                'fidelity': None,
+            "RF_None_holdout_iterative_es_if": {
+                "resampling_strategy": "holdout-iterative-fit",
+                "fidelity": None,
+            },
+            "RF_None_3CV_iterative_es_if": {
+                "resampling_strategy": "cv-iterative-fit",
+                "folds": 3,
+                "fidelity": None,
             },
-            'RF_None_3CV_iterative_es_if': {
-                'resampling_strategy': 'cv-iterative-fit',
-                'folds': 3,
-                'fidelity': None,
+            "RF_None_5CV_iterative_es_if": {
+                "resampling_strategy": "cv-iterative-fit",
+                "folds": 5,
+                "fidelity": None,
             },
-            'RF_None_5CV_iterative_es_if': {
-                'resampling_strategy': 'cv-iterative-fit',
-                'folds': 5,
-                'fidelity': None,
+            "RF_None_10CV_iterative_es_if": {
+                "resampling_strategy": "cv-iterative-fit",
+                "folds": 10,
+                "fidelity": None,
             },
-            'RF_None_10CV_iterative_es_if': {
-                'resampling_strategy': 'cv-iterative-fit',
-                'folds': 10,
-                'fidelity': None,
+            "RF_SH-eta4-i_holdout_iterative_es_if": {
+                "resampling_strategy": "holdout-iterative-fit",
+                "fidelity": "SH",
             },
-            'RF_SH-eta4-i_holdout_iterative_es_if': {
-                'resampling_strategy': 'holdout-iterative-fit',
-                'fidelity': 'SH',
+            "RF_SH-eta4-i_3CV_iterative_es_if": {
+                "resampling_strategy": "cv-iterative-fit",
+                "folds": 3,
+                "fidelity": "SH",
             },
-            'RF_SH-eta4-i_3CV_iterative_es_if': {
-                'resampling_strategy': 'cv-iterative-fit',
-                'folds': 3,
-                'fidelity': 'SH',
+            "RF_SH-eta4-i_5CV_iterative_es_if": {
+                "resampling_strategy": "cv-iterative-fit",
+                "folds": 5,
+                "fidelity": "SH",
             },
-            'RF_SH-eta4-i_5CV_iterative_es_if': {
-                'resampling_strategy': 'cv-iterative-fit',
-                'folds': 5,
-                'fidelity': 'SH',
+            "RF_SH-eta4-i_10CV_iterative_es_if": {
+                "resampling_strategy": "cv-iterative-fit",
+                "folds": 10,
+                "fidelity": "SH",
             },
-            'RF_SH-eta4-i_10CV_iterative_es_if': {
-                'resampling_strategy': 'cv-iterative-fit',
-                'folds': 10,
-                'fidelity': 'SH',
-            }
         }[automl_policy]
 
-        resampling_strategy = setting['resampling_strategy']
-        if resampling_strategy == 'cv-iterative-fit':
-            resampling_strategy_kwargs = {'folds': setting['folds']}
+        resampling_strategy = setting["resampling_strategy"]
+        if resampling_strategy == "cv-iterative-fit":
+            resampling_strategy_kwargs = {"folds": setting["folds"]}
         else:
             resampling_strategy_kwargs = None
 
         portfolio_file = (
-            this_directory / metric_name / 'askl2_portfolios' / ('%s.json' % automl_policy)
+            this_directory
+            / metric_name
+            / "askl2_portfolios"
+            / ("%s.json" % automl_policy)
         )
         with open(portfolio_file) as fh:
             portfolio_json = json.load(fh)
-        portfolio = portfolio_json['portfolio']
+        portfolio = portfolio_json["portfolio"]
 
-        if setting['fidelity'] == 'SH':
-            smac_callback = SHObjectCallback('iterations', 4, 5.0, portfolio)
+        if setting["fidelity"] == "SH":
+            smac_callback = SHObjectCallback("iterations", 4, 5.0, portfolio)
         else:
             smac_callback = SmacObjectCallback(portfolio)
 
diff --git a/autosklearn/experimental/selector.py b/autosklearn/experimental/selector.py
index 3ef681bef8..125cba6125 100644
--- a/autosklearn/experimental/selector.py
+++ b/autosklearn/experimental/selector.py
@@ -1,6 +1,7 @@
+import typing
+
 import copy
 import itertools
-import typing
 
 import numpy as np
 import pandas as pd
@@ -9,7 +10,6 @@
 
 
 class AbstractSelector:
-
     def fit(
         self,
         X: pd.DataFrame,
@@ -19,18 +19,22 @@ def fit(
     ) -> None:
         raise NotImplementedError()
 
-    def predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame] = None) -> pd.DataFrame:
+    def predict(
+        self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame] = None
+    ) -> pd.DataFrame:
         prediction = self._predict(X, y)
         for col, series in prediction.iteritems():
             assert series.dtype == float, (col, series)
         np.testing.assert_array_almost_equal(
-            prediction.sum(axis='columns').to_numpy(),
+            prediction.sum(axis="columns").to_numpy(),
             np.ones(X.shape[0]),
             err_msg=prediction.to_csv(),
         )
         return prediction
 
-    def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.DataFrame:
+    def _predict(
+        self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]
+    ) -> pd.DataFrame:
         raise NotImplementedError()
 
 
@@ -68,43 +72,60 @@ def fit(
             weights[i] = dict()
             for j in range(i + 1, len(target_indices)):
 
-                if self.configuration['normalization'] in ('all', 'binary', 'y', 'all1',
-                                                           'binary1'):
+                if self.configuration["normalization"] in (
+                    "all",
+                    "binary",
+                    "y",
+                    "all1",
+                    "binary1",
+                ):
                     minimum2 = np.ones(len(X)) * np.inf
                     maximum2 = np.zeros(len(X))
 
-                    if self.configuration['normalization'] in ('all', 'all1'):
+                    if self.configuration["normalization"] in ("all", "all1"):
                         for idx, task_id in enumerate(X.index):
                             for method_id in range(len(target_indices)):
-                                minimum2[idx] = np.nanmin((
-                                    minimum2[idx],
-                                    minima[task_id][self.strategies_[method_id]]
-                                ))
-                                maximum2[idx] = np.nanmax((
-                                    maximum2[idx],
-                                    maxima[task_id][self.strategies_[method_id]]
-                                ))
-                        if self.configuration['normalization'] == 'all1':
+                                minimum2[idx] = np.nanmin(
+                                    (
+                                        minimum2[idx],
+                                        minima[task_id][self.strategies_[method_id]],
+                                    )
+                                )
+                                maximum2[idx] = np.nanmax(
+                                    (
+                                        maximum2[idx],
+                                        maxima[task_id][self.strategies_[method_id]],
+                                    )
+                                )
+                        if self.configuration["normalization"] == "all1":
                             maximum2 = np.ones_like(maximum2)
-                    elif self.configuration['normalization'] in ('binary', 'binary1'):
+                    elif self.configuration["normalization"] in ("binary", "binary1"):
                         for idx, task_id in enumerate(X.index):
                             for method_id in (i, j):
-                                minimum2[idx] = np.nanmin((
-                                    minimum2[idx],
-                                    minima[task_id][self.strategies_[method_id]]
-                                ))
-                                maximum2[idx] = np.nanmax((
-                                    maximum2[idx],
-                                    maxima[task_id][self.strategies_[method_id]]
-                                ))
-                        if self.configuration['normalization'] == 'binary1':
+                                minimum2[idx] = np.nanmin(
+                                    (
+                                        minimum2[idx],
+                                        minima[task_id][self.strategies_[method_id]],
+                                    )
+                                )
+                                maximum2[idx] = np.nanmax(
+                                    (
+                                        maximum2[idx],
+                                        maxima[task_id][self.strategies_[method_id]],
+                                    )
+                                )
+                        if self.configuration["normalization"] == "binary1":
                             maximum2 = np.ones_like(maximum2)
-                    elif self.configuration['normalization'] == 'y':
+                    elif self.configuration["normalization"] == "y":
                         for idx, task_id in enumerate(X.index):
-                            minimum2[idx] = np.nanmin((minimum2[idx], y_pd.loc[task_id].min()))
-                            maximum2[idx] = np.nanmax((maximum2[idx], y_pd.loc[task_id].max()))
+                            minimum2[idx] = np.nanmin(
+                                (minimum2[idx], y_pd.loc[task_id].min())
+                            )
+                            maximum2[idx] = np.nanmax(
+                                (maximum2[idx], y_pd.loc[task_id].max())
+                            )
                     else:
-                        raise ValueError(self.configuration['normalization'])
+                        raise ValueError(self.configuration["normalization"])
 
                     y_i_j = y[:, i] < y[:, j]
                     mask = np.isfinite(y[:, i]) & np.isfinite(y[:, j])
@@ -121,7 +142,7 @@ def fit(
 
                     weights_i_j = np.abs(normalized_y_i - normalized_y_j)
 
-                elif self.configuration['normalization'] == 'rank':
+                elif self.configuration["normalization"] == "rank":
                     y_i_j = y[:, i] < y[:, j]
                     mask = np.isfinite(y[:, i]) & np.isfinite(y[:, j])
                     X_ = X.to_numpy()[mask]
@@ -129,7 +150,7 @@ def fit(
                     ranks = scipy.stats.rankdata(y[mask], axis=1)
                     weights_i_j = np.abs(ranks[:, i] - ranks[:, j])
 
-                elif self.configuration['normalization'] == 'None':
+                elif self.configuration["normalization"] == "None":
                     y_i_j = y[:, i] < y[:, j]
                     mask = np.isfinite(y[:, i]) & np.isfinite(y[:, j])
                     X_ = X.to_numpy()[mask]
@@ -137,7 +158,7 @@ def fit(
                     weights_i_j = np.ones_like(y_i_j).astype(int)
 
                 else:
-                    raise ValueError(self.configuration['normalization'])
+                    raise ValueError(self.configuration["normalization"])
 
                 if len(y_i_j) == 0:
                     models[i][j] = None
@@ -148,21 +169,25 @@ def fit(
                     n_zeros = int(np.ceil(len(y_i_j) / 2))
                     n_ones = int(np.floor(len(y_i_j) / 2))
                     import sklearn.dummy
-                    base_model = sklearn.dummy.DummyClassifier(strategy='constant',
-                                                               constant=y_i_j[0])
+
+                    base_model = sklearn.dummy.DummyClassifier(
+                        strategy="constant", constant=y_i_j[0]
+                    )
                     base_model.fit(
                         X_,
                         np.array(([[0]] * n_zeros) + ([[1]] * n_ones)).flatten(),
                         sample_weight=weights_i_j,
                     )
                 else:
-                    if self.configuration.get('max_depth') == 0:
+                    if self.configuration.get("max_depth") == 0:
                         import sklearn.dummy
+
                         loss_i = np.sum((y_i_j == 0) * weights_i_j)
                         loss_j = np.sum((y_i_j == 1) * weights_i_j)
 
                         base_model = sklearn.dummy.DummyClassifier(
-                            strategy='constant', constant=1 if loss_i < loss_j else 0,
+                            strategy="constant",
+                            constant=1 if loss_i < loss_j else 0,
                         )
                         base_model.fit(
                             X_,
@@ -171,7 +196,11 @@ def fit(
                         )
                     else:
                         base_model = self.fit_pairwise_model(
-                            X_, y_i_j, weights_i_j, self.rng, self.configuration,
+                            X_,
+                            y_i_j,
+                            weights_i_j,
+                            self.rng,
+                            self.configuration,
                         )
                 models[i][j] = base_model
                 weights[i][j] = weights_i_j
@@ -179,7 +208,9 @@ def fit(
         self.weights_ = weights
         self.target_indices = target_indices
 
-    def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.DataFrame:
+    def _predict(
+        self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]
+    ) -> pd.DataFrame:
 
         if y is not None:
             raise ValueError("y must not be provided")
@@ -193,7 +224,9 @@ def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.Data
                     raw_probas[(i, j)] = self.models[i][j].predict_proba(X)
 
         if len(raw_predictions) == 0:
-            predictions = pd.DataFrame(0, index=X.index, columns=self.strategies_).astype(float)
+            predictions = pd.DataFrame(
+                0, index=X.index, columns=self.strategies_
+            ).astype(float)
             predictions.iloc[:, self.single_strategy_idx] = 1.0
             return predictions
 
@@ -203,21 +236,21 @@ def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.Data
             for i in range(len(self.target_indices)):
                 for j in range(i + 1, len(self.target_indices)):
                     if (i, j) in raw_predictions:
-                        if self.configuration['prediction'] == 'soft':
+                        if self.configuration["prediction"] == "soft":
                             if raw_probas[(i, j)].shape[1] == 1:
                                 proba = raw_probas[(i, j)][x_idx][0]
                             else:
                                 proba = raw_probas[(i, j)][x_idx][1]
                             wins[i] += proba
                             wins[j] += 1 - proba
-                        elif self.configuration['prediction'] == 'hard':
+                        elif self.configuration["prediction"] == "hard":
                             prediction = raw_predictions[(i, j)][x_idx]
                             if prediction == 1:
                                 wins[i] += 1
                             else:
                                 wins[j] += 1
                         else:
-                            raise ValueError(self.configuration['prediction'])
+                            raise ValueError(self.configuration["prediction"])
 
             n_prev = np.inf
             # Tie breaking
@@ -236,7 +269,9 @@ def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.Data
                             hit = True
                             break
                     if not hit:
-                        wins[int(self.rng.choice(np.argwhere(most_wins_mask).flatten()))] += 1
+                        wins[
+                            int(self.rng.choice(np.argwhere(most_wins_mask).flatten()))
+                        ] += 1
                 elif np.sum(most_wins_mask) > 1:
                     n_prev = np.sum(most_wins_mask)
                     where = np.argwhere(most_wins_mask).flatten()
@@ -250,10 +285,9 @@ def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.Data
                         else:
                             method_i = self.strategies_[i]
                             method_j = self.strategies_[j]
-                            if (
-                                self.tie_break_order.index(method_i)
-                                < self.tie_break_order.index(method_j)
-                            ):
+                            if self.tie_break_order.index(
+                                method_i
+                            ) < self.tie_break_order.index(method_j):
                                 wins[i] += 1
                             else:
                                 wins[j] += 1
@@ -288,18 +322,17 @@ def fit_pairwise_model(self, X, y, weights, rng, configuration):
         base_model = sklearn.ensemble.RandomForestClassifier(
             random_state=rng,
             n_estimators=self.n_estimators,
-            bootstrap=True if configuration['bootstrap'] == 'True' else False,
-            min_samples_split=configuration['min_samples_split'],
-            min_samples_leaf=configuration['min_samples_leaf'],
-            max_features=int(configuration['max_features']),
-            max_depth=configuration['max_depth'],
+            bootstrap=True if configuration["bootstrap"] == "True" else False,
+            min_samples_split=configuration["min_samples_split"],
+            min_samples_leaf=configuration["min_samples_leaf"],
+            max_features=int(configuration["max_features"]),
+            max_depth=configuration["max_depth"],
         )
         base_model.fit(X, y, sample_weight=weights)
         return base_model
 
 
 class FallbackWrapper(AbstractSelector):
-
     def __init__(self, selector, default_strategies: typing.List[str]):
         self.selector = selector
         self.default_strategies = default_strategies
@@ -313,16 +346,19 @@ def fit(
     ) -> None:
         self.X_ = X
         self.strategies_ = y.columns
-        self.rval_ = np.array([
-            (
-                len(self.strategies_) - self.default_strategies.index(strategy) - 1
-            ) / (len(self.strategies_) - 1)
-            for strategy in self.strategies_
-        ])
+        self.rval_ = np.array(
+            [
+                (len(self.strategies_) - self.default_strategies.index(strategy) - 1)
+                / (len(self.strategies_) - 1)
+                for strategy in self.strategies_
+            ]
+        )
         self.rval_ = self.rval_ / np.sum(self.rval_)
         self.selector.fit(X, y, minima, maxima)
 
-    def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.DataFrame:
+    def _predict(
+        self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]
+    ) -> pd.DataFrame:
 
         if y is not None:
             prediction = self.selector.predict(X, y)
@@ -338,8 +374,11 @@ def _predict(self, X: pd.DataFrame, y: typing.Optional[pd.DataFrame]) -> pd.Data
                     counter += 1
 
             if counter == 0:
-                prediction.loc[task_id] = pd.Series({
-                    strategy: value for strategy, value in zip(self.strategies_, self.rval_)
-                })
+                prediction.loc[task_id] = pd.Series(
+                    {
+                        strategy: value
+                        for strategy, value in zip(self.strategies_, self.rval_)
+                    }
+                )
 
         return prediction
diff --git a/autosklearn/metalearning/__init__.py b/autosklearn/metalearning/__init__.py
index cc3cd7becd..e298f0f075 100644
--- a/autosklearn/metalearning/__init__.py
+++ b/autosklearn/metalearning/__init__.py
@@ -1,2 +1,2 @@
 # -*- encoding: utf-8 -*-
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/autosklearn/metalearning/input/aslib_simple.py b/autosklearn/metalearning/input/aslib_simple.py
index 7bac637c50..833242729d 100644
--- a/autosklearn/metalearning/input/aslib_simple.py
+++ b/autosklearn/metalearning/input/aslib_simple.py
@@ -1,7 +1,7 @@
-from collections import defaultdict, OrderedDict
 import csv
 import logging
 import os
+from collections import OrderedDict, defaultdict
 
 import arff
 import pandas as pd
@@ -24,7 +24,7 @@ def __init__(self, directory):
             # "feature_runstatus.arff": self._read_feature_runstatus,
             # "ground_truth.arff": self._read_ground_truth,
             # "cv.arff": self._read_cv,
-            "configurations.csv": self._read_configurations
+            "configurations.csv": self._read_configurations,
         }
         self.found_files = []
 
@@ -33,24 +33,28 @@ def __init__(self, directory):
         self._read_files()
 
     def _find_files(self):
-        '''
-            find all expected files in self.dir_
-            fills self.found_files
-        '''
+        """
+        find all expected files in self.dir_
+        fills self.found_files
+        """
         expected = [
             # "description.txt",
             "algorithm_runs.arff",
             "feature_values.arff",
             # "feature_runstatus.arff",
         ]
-        optional = ["ground_truth.arff", "feature_costs.arff", "citation.bib",
-                    "cv.arff", "configurations.csv"]
+        optional = [
+            "ground_truth.arff",
+            "feature_costs.arff",
+            "citation.bib",
+            "cv.arff",
+            "configurations.csv",
+        ]
 
         for expected_file in expected:
             full_path = os.path.join(self.dir_, expected_file)
             if not os.path.isfile(full_path):
-                self.logger.error(
-                    "Not found: %s (has to be added)" % (full_path))
+                self.logger.error("Not found: %s (has to be added)" % (full_path))
             else:
                 self.found_files.append(full_path)
 
@@ -64,10 +68,10 @@ def _find_files(self):
                 self.found_files.append(full_path)
 
     def _read_files(self):
-        '''
-            iterates over all found files (self.found_files) and
-            calls the corresponding function to validate file
-        '''
+        """
+        iterates over all found files (self.found_files) and
+        calls the corresponding function to validate file
+        """
         for file_ in self.found_files:
             read_func = self.read_funcs.get(os.path.basename(file_))
             if read_func:
@@ -79,15 +83,18 @@ def _read_algorithm_runs(self, filename):
 
         if arff_dict["attributes"][0][0].upper() != "INSTANCE_ID":
             self.logger.error(
-                "instance_id as first attribute is missing in %s" % (filename))
+                "instance_id as first attribute is missing in %s" % (filename)
+            )
         if arff_dict["attributes"][1][0].upper() != "REPETITION":
             self.logger.error(
-                "repetition as second attribute is missing in %s" % (filename))
+                "repetition as second attribute is missing in %s" % (filename)
+            )
         if arff_dict["attributes"][2][0].upper() != "ALGORITHM":
             self.logger.error(
-                "algorithm as third attribute is missing in %s" % (filename))
+                "algorithm as third attribute is missing in %s" % (filename)
+            )
 
-        performance_measures = [pm[0] for pm in arff_dict['attributes'][3:-1]]
+        performance_measures = [pm[0] for pm in arff_dict["attributes"][3:-1]]
 
         measure_instance_algorithm_triples = defaultdict(lambda: defaultdict(dict))
         for data in arff_dict["data"]:
@@ -97,18 +104,20 @@ def _read_algorithm_runs(self, filename):
             perf_list = data[3:-1]
             status = data[-1]
 
-            if status != 'ok':
+            if status != "ok":
                 continue
 
             for i, performance_measure in enumerate(performance_measures):
-                measure_instance_algorithm_triples[performance_measure][
-                    inst_name][algorithm] = perf_list[i]
+                measure_instance_algorithm_triples[performance_measure][inst_name][
+                    algorithm
+                ] = perf_list[i]
 
         # TODO: this does not support any repetitions!
         measure_algorithm_matrices = OrderedDict()
         for pm in performance_measures:
             measure_algorithm_matrices[pm] = pd.DataFrame(
-                measure_instance_algorithm_triples[pm]).transpose()
+                measure_instance_algorithm_triples[pm]
+            ).transpose()
 
         self.algorithm_runs = measure_algorithm_matrices
 
@@ -122,9 +131,10 @@ def _read_feature_values(self, filename):
             # repetition = data[1]
             features = data[2:]
 
-            metafeatures[inst_name] = {feature[0]: feature_value
-                                       for feature, feature_value in
-                                       zip(arff_dict['attributes'][2:], features)}
+            metafeatures[inst_name] = {
+                feature[0]: feature_value
+                for feature, feature_value in zip(arff_dict["attributes"][2:], features)
+            }
 
         self.metafeatures = pd.DataFrame(metafeatures).transpose()
 
@@ -135,9 +145,9 @@ def _read_configurations(self, filename):
             configurations = dict()
             for line in csv_reader:
                 configuration = dict()
-                algorithm_id = line['idx']
+                algorithm_id = line["idx"]
                 for hp_name, value in line.items():
-                    if not value or hp_name == 'idx':
+                    if not value or hp_name == "idx":
                         continue
 
                     try:
diff --git a/autosklearn/metalearning/metafeatures/metafeature.py b/autosklearn/metalearning/metafeatures/metafeature.py
index 821a5033f4..033b76116b 100644
--- a/autosklearn/metalearning/metafeatures/metafeature.py
+++ b/autosklearn/metalearning/metafeatures/metafeature.py
@@ -1,6 +1,7 @@
 from abc import ABCMeta, abstractmethod
-from io import StringIO
+
 import time
+from io import StringIO
 
 import arff
 import scipy.sparse
@@ -33,8 +34,15 @@ def __call__(self, X, y, logger, categorical=None):
             comment = "Memory Error"
 
         endtime = time.time()
-        return MetaFeatureValue(self.__class__.__name__, self.type_,
-                                0, 0, value, endtime-starttime, comment=comment)
+        return MetaFeatureValue(
+            self.__class__.__name__,
+            self.type_,
+            0,
+            0,
+            value,
+            endtime - starttime,
+            comment=comment,
+        )
 
 
 class MetaFeature(AbstractMetaFeature):
@@ -65,15 +73,26 @@ def to_arff_row(self):
         else:
             value = "?"
 
-        return [self.name, self.type_, self.fold,
-                self.repeat, value, self.time, self.comment]
+        return [
+            self.name,
+            self.type_,
+            self.fold,
+            self.repeat,
+            value,
+            self.time,
+            self.comment,
+        ]
 
     def __repr__(self):
-        repr = "%s (type: %s, fold: %d, repeat: %d, value: %s, time: %3.3f, " \
-               "comment: %s)"
-        repr = repr % tuple(self.to_arff_row()[:4] +
-                            [str(self.to_arff_row()[4])] +
-                            self.to_arff_row()[5:])
+        repr = (
+            "%s (type: %s, fold: %d, repeat: %d, value: %s, time: %3.3f, "
+            "comment: %s)"
+        )
+        repr = repr % tuple(
+            self.to_arff_row()[:4]
+            + [str(self.to_arff_row()[4])]
+            + self.to_arff_row()[5:]
+        )
         return repr
 
 
@@ -84,19 +103,21 @@ def __init__(self, dataset_name, metafeature_values):
 
     def _get_arff(self):
         output = dict()
-        output['relation'] = "metafeatures_%s" % (self.dataset_name)
-        output['description'] = ""
-        output['attributes'] = [('name', 'STRING'),
-                                ('type', 'STRING'),
-                                ('fold', 'NUMERIC'),
-                                ('repeat', 'NUMERIC'),
-                                ('value', 'NUMERIC'),
-                                ('time', 'NUMERIC'),
-                                ('comment', 'STRING')]
-        output['data'] = []
+        output["relation"] = "metafeatures_%s" % (self.dataset_name)
+        output["description"] = ""
+        output["attributes"] = [
+            ("name", "STRING"),
+            ("type", "STRING"),
+            ("fold", "NUMERIC"),
+            ("repeat", "NUMERIC"),
+            ("value", "NUMERIC"),
+            ("time", "NUMERIC"),
+            ("comment", "STRING"),
+        ]
+        output["data"] = []
 
         for key in sorted(self.metafeature_values):
-            output['data'].append(self.metafeature_values[key].to_arff_row())
+            output["data"].append(self.metafeature_values[key].to_arff_row())
         return output
 
     def dumps(self):
@@ -120,9 +141,9 @@ def load(cls, path_or_filehandle):
         else:
             input = arff.load(path_or_filehandle)
 
-        dataset_name = input['relation'].replace('metafeatures_', '')
+        dataset_name = input["relation"].replace("metafeatures_", "")
         metafeature_values = []
-        for item in input['data']:
+        for item in input["data"]:
             mf = MetaFeatureValue(*item)
             metafeature_values.append(mf)
 
@@ -135,13 +156,18 @@ def __repr__(self, verbosity=0):
             if verbosity == 0 and self.metafeature_values[name].type_ != "METAFEATURE":
                 continue
             if verbosity == 0:
-                repr.write("  %s: %s\n" %
-                           (str(name), str(self.metafeature_values[name].value)))
+                repr.write(
+                    "  %s: %s\n" % (str(name), str(self.metafeature_values[name].value))
+                )
             elif verbosity >= 1:
-                repr.write("  %s: %10s  (%10fs)\n" %
-                           (str(name), str(self.metafeature_values[
-                                               name].value)[:10],
-                            self.metafeature_values[name].time))
+                repr.write(
+                    "  %s: %10s  (%10fs)\n"
+                    % (
+                        str(name),
+                        str(self.metafeature_values[name].value)[:10],
+                        self.metafeature_values[name].time,
+                    )
+                )
 
             # Add the reason for a crash if one happened!
             if verbosity > 1 and self.metafeature_values[name].comment:
diff --git a/autosklearn/metalearning/metafeatures/metafeatures.py b/autosklearn/metalearning/metafeatures/metafeatures.py
index 9652cfc673..3c95fbf22f 100644
--- a/autosklearn/metalearning/metafeatures/metafeatures.py
+++ b/autosklearn/metalearning/metafeatures/metafeatures.py
@@ -1,22 +1,22 @@
-from collections import defaultdict, OrderedDict, deque
 import copy
+from collections import OrderedDict, defaultdict, deque
 
 import numpy as np
-
 import pandas as pd
-
+import scipy.sparse
 import scipy.stats
 from scipy.linalg import LinAlgError
-import scipy.sparse
 
 # TODO use balanced accuracy!
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.utils import check_array
 from sklearn.utils.multiclass import type_of_target
 
-from autosklearn.pipeline.components.data_preprocessing.feature_type \
-    import FeatTypeSplit
-from .metafeature import MetaFeature, HelperFunction, DatasetMetafeatures
+from autosklearn.pipeline.components.data_preprocessing.feature_type import (
+    FeatTypeSplit,
+)
+
+from .metafeature import DatasetMetafeatures, HelperFunction, MetaFeature
 
 
 # TODO Allow multiple dependencies for a metafeature
@@ -62,10 +62,12 @@ def define(self, name):
         """Decorator for adding helper functions to a "dictionary".
         This behaves like a function decorating a function,
         not a class decorating a function"""
+
         def wrapper(metafeature_class):
             instance = metafeature_class()
             self.__setitem__(name, instance)
             return instance
+
         return wrapper
 
 
@@ -107,19 +109,20 @@ def is_calculated(self, key):
         return key in self.values
 
     def get_dependency(self, name):
-        """Return the dependency of metafeature "name".
-        """
+        """Return the dependency of metafeature "name"."""
         return self.dependencies.get(name)
 
     def define(self, name, dependency=None):
         """Decorator for adding metafeature functions to a "dictionary" of
         metafeatures. This behaves like a function decorating a function,
         not a class decorating a function"""
+
         def wrapper(metafeature_class):
             instance = metafeature_class()
             self.__setitem__(name, instance)
             self.dependencies[name] = dependency
             return instance
+
         return wrapper
 
 
@@ -136,8 +139,7 @@ def _calculate(self, X, y, logger, categorical):
         return float(X.shape[0])
 
 
-@metafeatures.define("LogNumberOfInstances",
-                     dependency="NumberOfInstances")
+@metafeatures.define("LogNumberOfInstances", dependency="NumberOfInstances")
 class LogNumberOfInstances(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         return np.log(metafeatures.get_value("NumberOfInstances"))
@@ -151,8 +153,9 @@ class NumberOfClasses(MetaFeature):
     Calls np.unique on the targets. If the dataset is a multilabel dataset,
     does this for each label seperately and returns the mean.
     """
+
     def _calculate(self, X, y, logger, categorical):
-        if type_of_target(y) == 'multilabel-indicator':
+        if type_of_target(y) == "multilabel-indicator":
             # We have a label binary indicator array:
             # each sample is one row of a 2d array of shape (n_samples, n_classes)
             return y.shape[1]
@@ -168,8 +171,7 @@ def _calculate(self, X, y, logger, categorical):
         return float(X.shape[1])
 
 
-@metafeatures.define("LogNumberOfFeatures",
-                     dependency="NumberOfFeatures")
+@metafeatures.define("LogNumberOfFeatures", dependency="NumberOfFeatures")
 class LogNumberOfFeatures(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         return np.log(metafeatures.get_value("NumberOfFeatures"))
@@ -183,13 +185,11 @@ def _calculate(self, X, y, logger, categorical):
 
     def _calculate_sparse(self, X, y, logger, categorical):
         data = [True if not np.isfinite(x) else False for x in X.data]
-        missing = X.__class__((data, X.indices, X.indptr), shape=X.shape,
-                              dtype=bool)
+        missing = X.__class__((data, X.indices, X.indptr), shape=X.shape, dtype=bool)
         return missing
 
 
-@metafeatures.define("NumberOfInstancesWithMissingValues",
-                     dependency="MissingValues")
+@metafeatures.define("NumberOfInstancesWithMissingValues", dependency="MissingValues")
 class NumberOfInstancesWithMissingValues(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         missing = helper_functions.get_value("MissingValues")
@@ -199,14 +199,18 @@ def _calculate(self, X, y, logger, categorical):
     def _calculate_sparse(self, X, y, logger, categorical):
         missing = helper_functions.get_value("MissingValues")
         new_missing = missing.tocsr()
-        num_missing = [np.sum(new_missing.data[new_missing.indptr[i]:new_missing.indptr[i + 1]])
-                       for i in range(new_missing.shape[0])]
+        num_missing = [
+            np.sum(new_missing.data[new_missing.indptr[i] : new_missing.indptr[i + 1]])
+            for i in range(new_missing.shape[0])
+        ]
 
         return float(np.sum([1 if num > 0 else 0 for num in num_missing]))
 
 
-@metafeatures.define("PercentageOfInstancesWithMissingValues",
-                     dependency="NumberOfInstancesWithMissingValues")
+@metafeatures.define(
+    "PercentageOfInstancesWithMissingValues",
+    dependency="NumberOfInstancesWithMissingValues",
+)
 class PercentageOfInstancesWithMissingValues(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         n_missing = metafeatures.get_value("NumberOfInstancesWithMissingValues")
@@ -214,8 +218,7 @@ def _calculate(self, X, y, logger, categorical):
         return float(n_missing / n_total)
 
 
-@metafeatures.define("NumberOfFeaturesWithMissingValues",
-                     dependency="MissingValues")
+@metafeatures.define("NumberOfFeaturesWithMissingValues", dependency="MissingValues")
 class NumberOfFeaturesWithMissingValues(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         missing = helper_functions.get_value("MissingValues")
@@ -225,15 +228,18 @@ def _calculate(self, X, y, logger, categorical):
     def _calculate_sparse(self, X, y, logger, categorical):
         missing = helper_functions.get_value("MissingValues")
         new_missing = missing.tocsc()
-        num_missing = [np.sum(
-            new_missing.data[new_missing.indptr[i]:new_missing.indptr[i+1]])
-                       for i in range(missing.shape[1])]
+        num_missing = [
+            np.sum(new_missing.data[new_missing.indptr[i] : new_missing.indptr[i + 1]])
+            for i in range(missing.shape[1])
+        ]
 
         return float(np.sum([1 if num > 0 else 0 for num in num_missing]))
 
 
-@metafeatures.define("PercentageOfFeaturesWithMissingValues",
-                     dependency="NumberOfFeaturesWithMissingValues")
+@metafeatures.define(
+    "PercentageOfFeaturesWithMissingValues",
+    dependency="NumberOfFeaturesWithMissingValues",
+)
 class PercentageOfFeaturesWithMissingValues(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         n_missing = metafeatures.get_value("NumberOfFeaturesWithMissingValues")
@@ -250,12 +256,12 @@ def _calculate(self, X, y, logger, categorical):
             return float(np.count_nonzero(helper_functions.get_value("MissingValues")))
 
 
-@metafeatures.define("PercentageOfMissingValues",
-                     dependency="NumberOfMissingValues")
+@metafeatures.define("PercentageOfMissingValues", dependency="NumberOfMissingValues")
 class PercentageOfMissingValues(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
-        return float(metafeatures.get_value("NumberOfMissingValues")) / \
-               float(X.shape[0]*X.shape[1])
+        return float(metafeatures.get_value("NumberOfMissingValues")) / float(
+            X.shape[0] * X.shape[1]
+        )
 
 
 # TODO: generalize this!
@@ -274,24 +280,28 @@ def _calculate(self, X, y, logger, categorical):
 @metafeatures.define("RatioNumericalToNominal")
 class RatioNumericalToNominal(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
-        num_categorical = float(metafeatures[
-            "NumberOfCategoricalFeatures"](X, y, logger, categorical).value)
-        num_numerical = float(metafeatures[
-            "NumberOfNumericFeatures"](X, y, logger, categorical).value)
+        num_categorical = float(
+            metafeatures["NumberOfCategoricalFeatures"](X, y, logger, categorical).value
+        )
+        num_numerical = float(
+            metafeatures["NumberOfNumericFeatures"](X, y, logger, categorical).value
+        )
         if num_categorical == 0.0:
-            return 0.
+            return 0.0
         return num_numerical / num_categorical
 
 
 @metafeatures.define("RatioNominalToNumerical")
 class RatioNominalToNumerical(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
-        num_categorical = float(metafeatures[
-            "NumberOfCategoricalFeatures"](X, y, logger, categorical).value)
-        num_numerical = float(metafeatures[
-            "NumberOfNumericFeatures"](X, y, logger, categorical).value)
+        num_categorical = float(
+            metafeatures["NumberOfCategoricalFeatures"](X, y, logger, categorical).value
+        )
+        num_numerical = float(
+            metafeatures["NumberOfNumericFeatures"](X, y, logger, categorical).value
+        )
         if num_numerical == 0.0:
-            return 0.
+            return 0.0
         else:
             return num_categorical / num_numerical
 
@@ -300,8 +310,9 @@ def _calculate(self, X, y, logger, categorical):
 @metafeatures.define("DatasetRatio")
 class DatasetRatio(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
-        return float(metafeatures["NumberOfFeatures"](X, y, logger).value) /\
-            float(metafeatures["NumberOfInstances"](X, y, logger).value)
+        return float(metafeatures["NumberOfFeatures"](X, y, logger).value) / float(
+            metafeatures["NumberOfInstances"](X, y, logger).value
+        )
 
 
 @metafeatures.define("LogDatasetRatio", dependency="DatasetRatio")
@@ -313,12 +324,12 @@ def _calculate(self, X, y, logger, categorical):
 @metafeatures.define("InverseDatasetRatio")
 class InverseDatasetRatio(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
-        return float(metafeatures["NumberOfInstances"](X, y, logger).value) /\
-            float(metafeatures["NumberOfFeatures"](X, y, logger).value)
+        return float(metafeatures["NumberOfInstances"](X, y, logger).value) / float(
+            metafeatures["NumberOfFeatures"](X, y, logger).value
+        )
 
 
-@metafeatures.define("LogInverseDatasetRatio",
-                     dependency="InverseDatasetRatio")
+@metafeatures.define("LogInverseDatasetRatio", dependency="InverseDatasetRatio")
 class LogInverseDatasetRatio(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         return np.log(metafeatures.get_value("InverseDatasetRatio"))
@@ -385,12 +396,13 @@ def _calculate(self, X, y, logger, categorical):
             occurences = []
             for i in range(y.shape[1]):
                 occurences.extend(
-                    [occurrence for occurrence in occurence_dict[
-                        i].values()])
+                    [occurrence for occurrence in occurence_dict[i].values()]
+                )
             occurences = np.array(occurences)
         else:
-            occurences = np.array([occurrence for occurrence in occurence_dict.values()],
-                                  dtype=np.float64)
+            occurences = np.array(
+                [occurrence for occurrence in occurence_dict.values()], dtype=np.float64
+            )
         return (occurences / y.shape[0]).mean()
 
 
@@ -403,15 +415,16 @@ def _calculate(self, X, y, logger, categorical):
             stds = []
             for i in range(y.shape[1]):
                 std = np.array(
-                    [occurrence for occurrence in occurence_dict[
-                                                      i].values()],
-                    dtype=np.float64)
+                    [occurrence for occurrence in occurence_dict[i].values()],
+                    dtype=np.float64,
+                )
                 std = (std / y.shape[0]).std()
                 stds.append(std)
             return np.mean(stds)
         else:
-            occurences = np.array([occurrence for occurrence in occurence_dict.values()],
-                                  dtype=np.float64)
+            occurences = np.array(
+                [occurrence for occurrence in occurence_dict.values()], dtype=np.float64
+            )
             return (occurences / y.shape[0]).std()
 
 
@@ -424,10 +437,11 @@ class NumSymbols(HelperFunction):
     def _calculate(self, X, y, logger, categorical):
         symbols_per_column = []
         for i in range(X.shape[1]):
-            if categorical[X.columns[i] if hasattr(X, 'columns') else i]:
-                column = X.iloc[:, i] if hasattr(X, 'iloc') else X[:, i]
-                unique_values = column.unique() if hasattr(
-                    column, 'unique') else np.unique(column)
+            if categorical[X.columns[i] if hasattr(X, "columns") else i]:
+                column = X.iloc[:, i] if hasattr(X, "iloc") else X[:, i]
+                unique_values = (
+                    column.unique() if hasattr(column, "unique") else np.unique(column)
+                )
                 num_unique = np.sum(pd.notna(unique_values))
                 symbols_per_column.append(num_unique)
         return symbols_per_column
@@ -436,7 +450,7 @@ def _calculate_sparse(self, X, y, logger, categorical):
         symbols_per_column = []
         new_X = X.tocsc()
         for i in range(new_X.shape[1]):
-            if categorical[X.columns[i] if hasattr(X, 'columns') else i]:
+            if categorical[X.columns[i] if hasattr(X, "columns") else i]:
                 unique_values = np.unique(new_X.getcol(i).data)
                 num_unique = np.sum(np.isfinite(unique_values))
                 symbols_per_column.append(num_unique)
@@ -489,6 +503,7 @@ def _calculate(self, X, y, logger, categorical):
         sum = np.nansum(helper_functions.get_value("NumSymbols"))
         return sum if np.isfinite(sum) else 0
 
+
 ################################################################################
 # Statistical meta features
 # Only use third and fourth statistical moment because it is common to
@@ -502,19 +517,21 @@ class Kurtosisses(HelperFunction):
     def _calculate(self, X, y, logger, categorical):
         kurts = []
         for i in range(X.shape[1]):
-            if not categorical[X.columns[i] if hasattr(X, 'columns') else i]:
-                kurts.append(scipy.stats.kurtosis(
-                    X.iloc[:, i] if hasattr(X, 'iloc') else X[:, i]
-                ))
+            if not categorical[X.columns[i] if hasattr(X, "columns") else i]:
+                kurts.append(
+                    scipy.stats.kurtosis(
+                        X.iloc[:, i] if hasattr(X, "iloc") else X[:, i]
+                    )
+                )
         return kurts
 
     def _calculate_sparse(self, X, y, logger, categorical):
         kurts = []
         X_new = X.tocsc()
         for i in range(X_new.shape[1]):
-            if not categorical[X.columns[i] if hasattr(X, 'columns') else i]:
+            if not categorical[X.columns[i] if hasattr(X, "columns") else i]:
                 start = X_new.indptr[i]
-                stop = X_new.indptr[i+1]
+                stop = X_new.indptr[i + 1]
                 kurts.append(scipy.stats.kurtosis(X_new.data[start:stop]))
         return kurts
 
@@ -556,17 +573,17 @@ class Skewnesses(HelperFunction):
     def _calculate(self, X, y, logger, categorical):
         skews = []
         for i in range(X.shape[1]):
-            if not categorical[X.columns[i] if hasattr(X, 'columns') else i]:
-                skews.append(scipy.stats.skew(
-                    X.iloc[:, i] if hasattr(X, 'iloc') else X[:, i]
-                ))
+            if not categorical[X.columns[i] if hasattr(X, "columns") else i]:
+                skews.append(
+                    scipy.stats.skew(X.iloc[:, i] if hasattr(X, "iloc") else X[:, i])
+                )
         return skews
 
     def _calculate_sparse(self, X, y, logger, categorical):
         skews = []
         X_new = X.tocsc()
         for i in range(X_new.shape[1]):
-            if not categorical[X.columns[i] if hasattr(X, 'columns') else i]:
+            if not categorical[X.columns[i] if hasattr(X, "columns") else i]:
                 start = X_new.indptr[i]
                 stop = X_new.indptr[i + 1]
                 skews.append(scipy.stats.skew(X_new.data[start:stop]))
@@ -628,8 +645,11 @@ def _calculate(self, X, y, logger, categorical):
             occurence_dict = defaultdict(float)
             for value in y if labels == 1 else y[:, i]:
                 occurence_dict[value] += 1
-            entropies.append(scipy.stats.entropy([occurence_dict[key] for key in
-                                                 occurence_dict], base=2))
+            entropies.append(
+                scipy.stats.entropy(
+                    [occurence_dict[key] for key in occurence_dict], base=2
+                )
+            )
 
         return np.mean(entropies)
 
@@ -669,34 +689,35 @@ def _calculate(self, X, y, logger, categorical):
 class LandmarkLDA(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         import sklearn.discriminant_analysis
-        if type(y) in ('binary', 'multiclass'):
+
+        if type(y) in ("binary", "multiclass"):
             kf = sklearn.model_selection.StratifiedKFold(n_splits=5)
         else:
             kf = sklearn.model_selection.KFold(n_splits=5)
 
-        accuracy = 0.
+        accuracy = 0.0
         try:
             for train, test in kf.split(X, y):
                 lda = sklearn.discriminant_analysis.LinearDiscriminantAnalysis()
 
                 if len(y.shape) == 1 or y.shape[1] == 1:
                     lda.fit(
-                        X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                        y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                        X.iloc[train] if hasattr(X, "iloc") else X[train],
+                        y.iloc[train] if hasattr(y, "iloc") else y[train],
                     )
                 else:
                     lda = OneVsRestClassifier(lda)
                     lda.fit(
-                        X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                        y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                        X.iloc[train] if hasattr(X, "iloc") else X[train],
+                        y.iloc[train] if hasattr(y, "iloc") else y[train],
                     )
 
                 predictions = lda.predict(
-                    X.iloc[test] if hasattr(X, 'iloc') else X[test],
+                    X.iloc[test] if hasattr(X, "iloc") else X[test],
                 )
                 accuracy += sklearn.metrics.accuracy_score(
                     predictions,
-                    y.iloc[test] if hasattr(y, 'iloc') else y[test],
+                    y.iloc[test] if hasattr(y, "iloc") else y[test],
                 )
             return accuracy / 5
         except scipy.linalg.LinAlgError as e:
@@ -716,33 +737,33 @@ class LandmarkNaiveBayes(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         import sklearn.naive_bayes
 
-        if type(y) in ('binary', 'multiclass'):
+        if type(y) in ("binary", "multiclass"):
             kf = sklearn.model_selection.StratifiedKFold(n_splits=5)
         else:
             kf = sklearn.model_selection.KFold(n_splits=5)
 
-        accuracy = 0.
+        accuracy = 0.0
         for train, test in kf.split(X, y):
             nb = sklearn.naive_bayes.GaussianNB()
 
             if len(y.shape) == 1 or y.shape[1] == 1:
                 nb.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
             else:
                 nb = OneVsRestClassifier(nb)
                 nb.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
 
             predictions = nb.predict(
-                X.iloc[test] if hasattr(X, 'iloc') else X[test],
+                X.iloc[test] if hasattr(X, "iloc") else X[test],
             )
             accuracy += sklearn.metrics.accuracy_score(
                 predictions,
-                y.iloc[test] if hasattr(y, 'iloc') else y[test],
+                y.iloc[test] if hasattr(y, "iloc") else y[test],
             )
         return accuracy / 5
 
@@ -756,34 +777,34 @@ class LandmarkDecisionTree(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         import sklearn.tree
 
-        if type(y) in ('binary', 'multiclass'):
+        if type(y) in ("binary", "multiclass"):
             kf = sklearn.model_selection.StratifiedKFold(n_splits=5)
         else:
             kf = sklearn.model_selection.KFold(n_splits=5)
 
-        accuracy = 0.
+        accuracy = 0.0
         for train, test in kf.split(X, y):
             random_state = sklearn.utils.check_random_state(42)
             tree = sklearn.tree.DecisionTreeClassifier(random_state=random_state)
 
             if len(y.shape) == 1 or y.shape[1] == 1:
                 tree.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
             else:
                 tree = OneVsRestClassifier(tree)
                 tree.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
 
             predictions = tree.predict(
-                X.iloc[test] if hasattr(X, 'iloc') else X[test],
+                X.iloc[test] if hasattr(X, "iloc") else X[test],
             )
             accuracy += sklearn.metrics.accuracy_score(
                 predictions,
-                y.iloc[test] if hasattr(y, 'iloc') else y[test],
+                y.iloc[test] if hasattr(y, "iloc") else y[test],
             )
         return accuracy / 5
 
@@ -803,34 +824,39 @@ class LandmarkDecisionNodeLearner(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         import sklearn.tree
 
-        if type(y) in ('binary', 'multiclass'):
+        if type(y) in ("binary", "multiclass"):
             kf = sklearn.model_selection.StratifiedKFold(n_splits=5)
         else:
             kf = sklearn.model_selection.KFold(n_splits=5)
 
-        accuracy = 0.
+        accuracy = 0.0
         for train, test in kf.split(X, y):
             random_state = sklearn.utils.check_random_state(42)
             node = sklearn.tree.DecisionTreeClassifier(
-                criterion="entropy", max_depth=1, random_state=random_state,
-                min_samples_split=2, min_samples_leaf=1,  max_features=None)
+                criterion="entropy",
+                max_depth=1,
+                random_state=random_state,
+                min_samples_split=2,
+                min_samples_leaf=1,
+                max_features=None,
+            )
             if len(y.shape) == 1 or y.shape[1] == 1:
                 node.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
             else:
                 node = OneVsRestClassifier(node)
                 node.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
             predictions = node.predict(
-                X.iloc[test] if hasattr(X, 'iloc') else X[test],
+                X.iloc[test] if hasattr(X, "iloc") else X[test],
             )
             accuracy += sklearn.metrics.accuracy_score(
                 predictions,
-                y.iloc[test] if hasattr(y, 'iloc') else y[test],
+                y.iloc[test] if hasattr(y, "iloc") else y[test],
             )
         return accuracy / 5
 
@@ -843,27 +869,32 @@ class LandmarkRandomNodeLearner(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         import sklearn.tree
 
-        if type(y) in ('binary', 'multiclass'):
+        if type(y) in ("binary", "multiclass"):
             kf = sklearn.model_selection.StratifiedKFold(n_splits=5)
         else:
             kf = sklearn.model_selection.KFold(n_splits=5)
-        accuracy = 0.
+        accuracy = 0.0
 
         for train, test in kf.split(X, y):
             random_state = sklearn.utils.check_random_state(42)
             node = sklearn.tree.DecisionTreeClassifier(
-                criterion="entropy", max_depth=1, random_state=random_state,
-                min_samples_split=2, min_samples_leaf=1, max_features=1)
+                criterion="entropy",
+                max_depth=1,
+                random_state=random_state,
+                min_samples_split=2,
+                min_samples_leaf=1,
+                max_features=1,
+            )
             node.fit(
-                X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                X.iloc[train] if hasattr(X, "iloc") else X[train],
+                y.iloc[train] if hasattr(y, "iloc") else y[train],
             )
             predictions = node.predict(
-                X.iloc[test] if hasattr(X, 'iloc') else X[test],
+                X.iloc[test] if hasattr(X, "iloc") else X[test],
             )
             accuracy += sklearn.metrics.accuracy_score(
                 predictions,
-                y.iloc[test] if hasattr(y, 'iloc') else y[test],
+                y.iloc[test] if hasattr(y, "iloc") else y[test],
             )
         return accuracy / 5
 
@@ -903,31 +934,31 @@ class Landmark1NN(MetaFeature):
     def _calculate(self, X, y, logger, categorical):
         import sklearn.neighbors
 
-        if type(y) in ('binary', 'multiclass'):
+        if type(y) in ("binary", "multiclass"):
             kf = sklearn.model_selection.StratifiedKFold(n_splits=5)
         else:
             kf = sklearn.model_selection.KFold(n_splits=5)
 
-        accuracy = 0.
+        accuracy = 0.0
         for train, test in kf.split(X, y):
             kNN = sklearn.neighbors.KNeighborsClassifier(n_neighbors=1)
             if len(y.shape) == 1 or y.shape[1] == 1:
                 kNN.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
             else:
                 kNN = OneVsRestClassifier(kNN)
                 kNN.fit(
-                    X.iloc[train] if hasattr(X, 'iloc') else X[train],
-                    y.iloc[train] if hasattr(y, 'iloc') else y[train],
+                    X.iloc[train] if hasattr(X, "iloc") else X[train],
+                    y.iloc[train] if hasattr(y, "iloc") else y[train],
                 )
             predictions = kNN.predict(
-                X.iloc[test] if hasattr(X, 'iloc') else X[test],
+                X.iloc[test] if hasattr(X, "iloc") else X[test],
             )
             accuracy += sklearn.metrics.accuracy_score(
                 predictions,
-                y.iloc[test] if hasattr(y, 'iloc') else y[test],
+                y.iloc[test] if hasattr(y, "iloc") else y[test],
             )
         return accuracy / 5
 
@@ -945,6 +976,7 @@ def _calculate(self, X, y, logger, categorical):
 class PCA(HelperFunction):
     def _calculate(self, X, y, logger, categorical):
         import sklearn.decomposition
+
         pca = sklearn.decomposition.PCA(copy=True)
         rs = np.random.RandomState(42)
         indices = np.arange(X.shape[0])
@@ -952,7 +984,7 @@ def _calculate(self, X, y, logger, categorical):
             try:
                 rs.shuffle(indices)
                 pca.fit(
-                    X.iloc[indices] if hasattr(X, 'iloc') else X[indices],
+                    X.iloc[indices] if hasattr(X, "iloc") else X[indices],
                 )
                 return pca
             except LinAlgError:
@@ -962,6 +994,7 @@ def _calculate(self, X, y, logger, categorical):
 
     def _calculate_sparse(self, X, y, logger, categorical):
         import sklearn.decomposition
+
         rs = np.random.RandomState(42)
         indices = np.arange(X.shape[0])
         # This is expensive, but necessary with scikit-learn 0.15
@@ -970,8 +1003,8 @@ def _calculate_sparse(self, X, y, logger, categorical):
             try:
                 rs.shuffle(indices)
                 truncated_svd = sklearn.decomposition.TruncatedSVD(
-                    n_components=X.shape[1]-1, random_state=i,
-                    algorithm="randomized")
+                    n_components=X.shape[1] - 1, random_state=i, algorithm="randomized"
+                )
                 truncated_svd.fit(Xt[indices])
                 return truncated_svd
             except LinAlgError:
@@ -987,12 +1020,12 @@ def _calculate(self, X, y, logger, categorical):
         pca_ = helper_functions.get_value("PCA")
         if pca_ is None:
             return np.NaN
-        sum_ = 0.
+        sum_ = 0.0
         idx = 0
         while sum_ < 0.95 and idx < len(pca_.explained_variance_ratio_):
             sum_ += pca_.explained_variance_ratio_[idx]
             idx += 1
-        return float(idx)/float(X.shape[1])
+        return float(idx) / float(X.shape[1])
 
 
 # Kurtosis of first PC
@@ -1027,8 +1060,9 @@ def _calculate(self, X, y, logger, categorical):
         return skewness[0]
 
 
-def calculate_all_metafeatures_encoded_labels(X, y, categorical, dataset_name, logger,
-                                              calculate=None, dont_calculate=None):
+def calculate_all_metafeatures_encoded_labels(
+    X, y, categorical, dataset_name, logger, calculate=None, dont_calculate=None
+):
     """
     Calculate only metafeatures for which a 1HotEncoded feature matrix is necessery.
     """
@@ -1036,25 +1070,46 @@ def calculate_all_metafeatures_encoded_labels(X, y, categorical, dataset_name, l
     calculate = set()
     calculate.update(npy_metafeatures)
 
-    return calculate_all_metafeatures(X, y, categorical, dataset_name,
-                                      calculate=calculate,
-                                      dont_calculate=dont_calculate, logger=logger)
+    return calculate_all_metafeatures(
+        X,
+        y,
+        categorical,
+        dataset_name,
+        calculate=calculate,
+        dont_calculate=dont_calculate,
+        logger=logger,
+    )
 
 
-def calculate_all_metafeatures_with_labels(X, y, categorical, dataset_name, logger,
-                                           calculate=None, dont_calculate=None):
+def calculate_all_metafeatures_with_labels(
+    X, y, categorical, dataset_name, logger, calculate=None, dont_calculate=None
+):
     if dont_calculate is None:
         dont_calculate = set()
     else:
         dont_calculate = copy.deepcopy(dont_calculate)
     dont_calculate.update(npy_metafeatures)
-    return calculate_all_metafeatures(X, y, categorical, dataset_name,
-                                      calculate=calculate,
-                                      dont_calculate=dont_calculate, logger=logger)
-
-
-def calculate_all_metafeatures(X, y, categorical, dataset_name, logger,
-                               calculate=None, dont_calculate=None, densify_threshold=1000):
+    return calculate_all_metafeatures(
+        X,
+        y,
+        categorical,
+        dataset_name,
+        calculate=calculate,
+        dont_calculate=dont_calculate,
+        logger=logger,
+    )
+
+
+def calculate_all_metafeatures(
+    X,
+    y,
+    categorical,
+    dataset_name,
+    logger,
+    calculate=None,
+    dont_calculate=None,
+    densify_threshold=1000,
+):
 
     """Calculate all metafeatures."""
     helper_functions.clear()
@@ -1083,8 +1138,10 @@ def calculate_all_metafeatures(X, y, categorical, dataset_name, logger,
                 # sparse matrices because of wrong sparse format)
                 sparse = scipy.sparse.issparse(X)
 
-                feat_type = {key: 'categorical' if value else 'numerical'
-                             for key, value in categorical.items()}
+                feat_type = {
+                    key: "categorical" if value else "numerical"
+                    for key, value in categorical.items()
+                }
 
                 # TODO make this more cohesive to the overall structure (quick bug fix)
                 if isinstance(X, pd.DataFrame):
@@ -1095,9 +1152,12 @@ def calculate_all_metafeatures(X, y, categorical, dataset_name, logger,
                     # The difference between feat_type and categorical, is that
                     # categorical has True/False instead of categorical/numerical
                     feat_type=feat_type,
-                    force_sparse_output=True)
+                    force_sparse_output=True,
+                )
                 X_transformed = DPP.fit_transform(X)
-                categorical_transformed = {i: False for i in range(X_transformed.shape[1])}
+                categorical_transformed = {
+                    i: False for i in range(X_transformed.shape[1])
+                }
 
                 # Densify the transformed matrix
                 if not sparse and scipy.sparse.issparse(X_transformed):
@@ -1111,9 +1171,9 @@ def calculate_all_metafeatures(X, y, categorical, dataset_name, logger,
                 # sorted in a strange way, but also prevents lda from failing in
                 # some cases.
                 # Because this is advanced indexing, a copy of the data is returned!!!
-                X_transformed = check_array(X_transformed,
-                                            force_all_finite=True,
-                                            accept_sparse='csr')
+                X_transformed = check_array(
+                    X_transformed, force_all_finite=True, accept_sparse="csr"
+                )
                 indices = np.arange(X_transformed.shape[0])
 
                 rs = np.random.RandomState(42)
@@ -1143,17 +1203,15 @@ def calculate_all_metafeatures(X, y, categorical, dataset_name, logger,
             elif is_metafeature and not metafeatures.is_calculated(dependency):
                 to_visit.appendleft(name)
                 continue
-            elif is_helper_function and not helper_functions.is_calculated(
-                    dependency):
-                logger.info("%s: Going to calculate: %s", dataset_name,
-                            dependency)
+            elif is_helper_function and not helper_functions.is_calculated(dependency):
+                logger.info("%s: Going to calculate: %s", dataset_name, dependency)
                 value = helper_functions[dependency](
-                    X_, y_, categorical=categorical_, logger=logger)
+                    X_, y_, categorical=categorical_, logger=logger
+                )
                 helper_functions.set_value(dependency, value)
                 mf_[dependency] = value
 
-        logger.info("%s: Going to calculate: %s", dataset_name,
-                    name)
+        logger.info("%s: Going to calculate: %s", dataset_name, name)
 
         value = metafeatures[name](X_, y_, logger, categorical_)
         metafeatures.set_value(name, value)
@@ -1164,40 +1222,48 @@ def calculate_all_metafeatures(X, y, categorical, dataset_name, logger,
     return mf_
 
 
-npy_metafeatures = set(["LandmarkLDA",
-                        "LandmarkNaiveBayes",
-                        "LandmarkDecisionTree",
-                        "LandmarkDecisionNodeLearner",
-                        "LandmarkRandomNodeLearner",
-                        "LandmarkWorstNodeLearner",
-                        "Landmark1NN",
-                        "PCAFractionOfComponentsFor95PercentVariance",
-                        "PCAKurtosisFirstPC",
-                        "PCASkewnessFirstPC",
-                        "Skewnesses",
-                        "SkewnessMin",
-                        "SkewnessMax",
-                        "SkewnessMean",
-                        "SkewnessSTD",
-                        "Kurtosisses",
-                        "KurtosisMin",
-                        "KurtosisMax",
-                        "KurtosisMean",
-                        "KurtosisSTD"])
+npy_metafeatures = set(
+    [
+        "LandmarkLDA",
+        "LandmarkNaiveBayes",
+        "LandmarkDecisionTree",
+        "LandmarkDecisionNodeLearner",
+        "LandmarkRandomNodeLearner",
+        "LandmarkWorstNodeLearner",
+        "Landmark1NN",
+        "PCAFractionOfComponentsFor95PercentVariance",
+        "PCAKurtosisFirstPC",
+        "PCASkewnessFirstPC",
+        "Skewnesses",
+        "SkewnessMin",
+        "SkewnessMax",
+        "SkewnessMean",
+        "SkewnessSTD",
+        "Kurtosisses",
+        "KurtosisMin",
+        "KurtosisMax",
+        "KurtosisMean",
+        "KurtosisSTD",
+    ]
+)
 
 subsets = dict()
 # All implemented metafeatures
 subsets["all"] = set(metafeatures.functions.keys())
 
 # Metafeatures used by Pfahringer et al. (2000) in the first experiment
-subsets["pfahringer_2000_experiment1"] = set(["number_of_features",
-                                              "number_of_numeric_features",
-                                              "number_of_categorical_features",
-                                              "number_of_classes",
-                                              "class_probability_max",
-                                              "landmark_lda",
-                                              "landmark_naive_bayes",
-                                              "landmark_decision_tree"])
+subsets["pfahringer_2000_experiment1"] = set(
+    [
+        "number_of_features",
+        "number_of_numeric_features",
+        "number_of_categorical_features",
+        "number_of_classes",
+        "class_probability_max",
+        "landmark_lda",
+        "landmark_naive_bayes",
+        "landmark_decision_tree",
+    ]
+)
 
 # Metafeatures used by Pfahringer et al. (2000) in the second experiment
 # worst node learner not implemented yet
@@ -1209,19 +1275,27 @@ def calculate_all_metafeatures(X, y, categorical, dataset_name, logger,
 """
 
 # Metafeatures used by Yogatama and Mann (2014)
-subsets["yogotama_2014"] = set(["log_number_of_features",
-                                "log_number_of_instances",
-                                "number_of_classes"])
+subsets["yogotama_2014"] = set(
+    ["log_number_of_features", "log_number_of_instances", "number_of_classes"]
+)
 
 # Metafeatures used by Bardenet et al. (2013) for the AdaBoost.MH experiment
-subsets["bardenet_2013_boost"] = set(["number_of_classes",
-                                      "log_number_of_features",
-                                      "log_inverse_dataset_ratio",
-                                      "pca_95percent"])
+subsets["bardenet_2013_boost"] = set(
+    [
+        "number_of_classes",
+        "log_number_of_features",
+        "log_inverse_dataset_ratio",
+        "pca_95percent",
+    ]
+)
 
 # Metafeatures used by Bardenet et al. (2013) for the Neural Net experiment
-subsets["bardenet_2013_nn"] = set(["number_of_classes",
-                                   "log_number_of_features",
-                                   "log_inverse_dataset_ratio",
-                                   "pca_kurtosis_first_pc",
-                                   "pca_skewness_first_pc"])
+subsets["bardenet_2013_nn"] = set(
+    [
+        "number_of_classes",
+        "log_number_of_features",
+        "log_inverse_dataset_ratio",
+        "pca_kurtosis_first_pc",
+        "pca_skewness_first_pc",
+    ]
+)
diff --git a/autosklearn/metalearning/metalearning/__init__.py b/autosklearn/metalearning/metalearning/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/autosklearn/metalearning/metalearning/__init__.py
+++ b/autosklearn/metalearning/metalearning/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/autosklearn/metalearning/metalearning/clustering/gmeans.py b/autosklearn/metalearning/metalearning/clustering/gmeans.py
index 573537e446..5c5233f284 100644
--- a/autosklearn/metalearning/metalearning/clustering/gmeans.py
+++ b/autosklearn/metalearning/metalearning/clustering/gmeans.py
@@ -9,8 +9,14 @@
 
 
 class GMeans(object):
-    def __init__(self, minimum_samples_per_cluster=2, n_init=10, significance=4,
-                 restarts=10, random_state=None, ):
+    def __init__(
+        self,
+        minimum_samples_per_cluster=2,
+        n_init=10,
+        significance=4,
+        restarts=10,
+        random_state=None,
+    ):
         self.minimum_samples_per_cluster = minimum_samples_per_cluster
         self.n_init = n_init
         self.significance = significance
@@ -21,8 +27,9 @@ def fit(self, X):
         self.inertia_ = np.inf
 
         for i in range(self.restarts):
-            KMeans = sklearn.cluster.KMeans(n_clusters=1, n_init=1,
-                                            random_state=self.random_state)
+            KMeans = sklearn.cluster.KMeans(
+                n_clusters=1, n_init=1, random_state=self.random_state
+            )
             KMeans.fit(X)
 
             while True:
@@ -34,14 +41,16 @@ def fit(self, X):
                     indices = KMeans.labels_ == i
                     X_ = X[indices]
 
-                    if np.sum(indices) < self.minimum_samples_per_cluster*2:
+                    if np.sum(indices) < self.minimum_samples_per_cluster * 2:
                         cluster_centers.append(cluster_center)
                         continue
 
                     for i in range(10):
-                        KMeans_ = sklearn.cluster.KMeans(n_clusters=2,
-                                                         n_init=self.n_init,
-                                                         random_state=self.random_state)
+                        KMeans_ = sklearn.cluster.KMeans(
+                            n_clusters=2,
+                            n_init=self.n_init,
+                            random_state=self.random_state,
+                        )
                         predictions = KMeans_.fit_predict(X_)
                         bins = np.bincount(predictions)
                         minimum = np.min(bins)
@@ -73,9 +82,12 @@ def fit(self, X):
                     break
 
                 # Refinement
-                KMeans = sklearn.cluster.KMeans(n_clusters=len(cluster_centers), n_init=1,
-                                                init=np.array(cluster_centers),
-                                                random_state=self.random_state)
+                KMeans = sklearn.cluster.KMeans(
+                    n_clusters=len(cluster_centers),
+                    n_init=1,
+                    init=np.array(cluster_centers),
+                    random_state=self.random_state,
+                )
                 KMeans.fit(X)
 
             if KMeans.inertia_ < self.inertia_:
diff --git a/autosklearn/metalearning/metalearning/create_datasets.py b/autosklearn/metalearning/metalearning/create_datasets.py
index a65b7840ed..1a60bebb7a 100644
--- a/autosklearn/metalearning/metalearning/create_datasets.py
+++ b/autosklearn/metalearning/metalearning/create_datasets.py
@@ -1,5 +1,6 @@
 import itertools
 import logging
+
 import numpy as np
 import pandas as pd
 import scipy.stats
@@ -13,11 +14,13 @@ def create_regression_dataset(metafeatures, experiments):
         experiment = experiments[dataset_name]
         mf = metafeatures.loc[dataset_name]
         for i, run in enumerate(experiment):
-            x1 = pd.Series(data=[run.params[param] for param in run.params],
-                           index=run.params.keys())
+            x1 = pd.Series(
+                data=[run.params[param] for param in run.params],
+                index=run.params.keys(),
+            )
             x2 = mf
             X.append(x1.append(x2))
-            X_indices.append('%s_%d' % (dataset_name, i))
+            X_indices.append("%s_%d" % (dataset_name, i))
             Y.append(run.result)
     X = pd.DataFrame(X, index=X_indices)
     Y = pd.DataFrame(Y, index=X_indices)
@@ -67,14 +70,19 @@ def create_predict_spearman_rank(metafeatures, experiments, iterator):
         responses_1 = np.zeros((len(experiments_1)), dtype=np.float64)
         responses_2 = np.zeros((len(experiments_1)), dtype=np.float64)
 
-        for idx, zipped in enumerate(zip(
+        for idx, zipped in enumerate(
+            zip(
                 sorted(experiments_1, key=lambda t: str(t.configuration)),
-                sorted(experiments_2, key=lambda t: str(t.configuration)))):
+                sorted(experiments_2, key=lambda t: str(t.configuration)),
+            )
+        ):
             # Test if the order of the params is the same
             exp_1, exp_2 = zipped
             print(exp_1.configuration, exp_2.configuration)
-            assert exp_1.configuration == exp_2.configuration,\
-                (experiments_1, experiments_2)
+            assert exp_1.configuration == exp_2.configuration, (
+                experiments_1,
+                experiments_2,
+            )
             responses_1[idx] = exp_1.result if np.isfinite(exp_1.result) else 1
             responses_2[idx] = exp_2.result if np.isfinite(exp_2.result) else 1
 
@@ -91,9 +99,11 @@ def create_predict_spearman_rank(metafeatures, experiments, iterator):
     logging.info("Metafeatures %s", metafeatures.shape)
     logging.info("X.shape %s", X.shape)
     logging.info("Y.shape %s", Y.shape)
-    assert X.shape == (len(cross_product), metafeatures.shape[1] * 2), \
-        (X.shape, (len(cross), metafeatures.shape[1] * 2))
-    assert Y.shape == (len(cross_product), )
+    assert X.shape == (len(cross_product), metafeatures.shape[1] * 2), (
+        X.shape,
+        (len(cross), metafeatures.shape[1] * 2),
+    )
+    assert Y.shape == (len(cross_product),)
     # train sklearn regressor (tree) with 10fold CV
     indices = range(len(X))
     np_rs = np.random.RandomState(42)
@@ -103,8 +113,7 @@ def create_predict_spearman_rank(metafeatures, experiments, iterator):
     return X, Y
 
 
-def create_predict_spearman_rank_with_cv(cv_metafeatures, cv_experiments,
-                                         iterator):
+def create_predict_spearman_rank_with_cv(cv_metafeatures, cv_experiments, iterator):
     X = []
     Y = []
     Y_names = []
@@ -128,13 +137,18 @@ def create_predict_spearman_rank_with_cv(cv_metafeatures, cv_experiments,
 
     logging.info("Create spearman rank dataset with CV data %s", iterator)
     logging.info("Using %d datasets", len(dataset_names))
-    logging.info("This will results in %d training points", len(cross_product) * len(folds_product))
+    logging.info(
+        "This will results in %d training points",
+        len(cross_product) * len(folds_product),
+    )
     logging.info("Length of dataset crossproduct %s", len(cross_product))
     logging.info("Length of folds crossproduct %s", len(folds_product))
 
     # Create inputs and targets
     for i, cross in enumerate(cross_product):
-        print("%d/%d: %s" % (i, len(cross_product), cross),)
+        print(
+            "%d/%d: %s" % (i, len(cross_product), cross),
+        )
         for folds in folds_product:
             name = "%s-%d_%s-%d" % (cross[0], folds[0], cross[1], folds[1])
             mf_1 = cv_metafeatures[cross[0]][folds[0]]
@@ -266,7 +280,7 @@ def create_smac_files_file(cv_metafeatures, cv_experiments, dataset,
         train_instances_file.seek(0)
         for line in train_instances_file:
             fh.write(line)
-"""
+"""  # noqa: E501
 
 
 if __name__ == "__main__":
diff --git a/autosklearn/metalearning/metalearning/kNearestDatasets/__init__.py b/autosklearn/metalearning/metalearning/kNearestDatasets/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/autosklearn/metalearning/metalearning/kNearestDatasets/__init__.py
+++ b/autosklearn/metalearning/metalearning/kNearestDatasets/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py b/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py
index 336d3b6bb3..f6c10c95d2 100644
--- a/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py
+++ b/autosklearn/metalearning/metalearning/kNearestDatasets/kND.py
@@ -1,13 +1,12 @@
 import numpy as np
 import pandas as pd
-
+import sklearn.utils
 from sklearn.neighbors import NearestNeighbors
 from sklearn.preprocessing import MinMaxScaler
-import sklearn.utils
 
 
 class KNearestDatasets(object):
-    def __init__(self, logger, metric='l1', random_state=None, metric_params=None):
+    def __init__(self, logger, metric="l1", random_state=None, metric_params=None):
         self.logger = logger
 
         self.metric = metric
@@ -37,8 +36,10 @@ def fit(self, metafeatures, runs):
         assert metafeatures.values.dtype in (np.float32, np.float64)
         assert np.isfinite(metafeatures.values).all()
         assert isinstance(runs, pd.DataFrame)
-        assert runs.shape[1] == metafeatures.shape[0], \
-            (runs.shape[1], metafeatures.shape[0])
+        assert runs.shape[1] == metafeatures.shape[0], (
+            runs.shape[1],
+            metafeatures.shape[0],
+        )
 
         self.metafeatures = metafeatures
         self.runs = runs
@@ -54,7 +55,8 @@ def fit(self, metafeatures, runs):
                 best_configuration_per_dataset[dataset_name] = None
             else:
                 configuration_idx = runs[dataset_name].index[
-                    np.nanargmin(runs[dataset_name].values)]
+                    np.nanargmin(runs[dataset_name].values)
+                ]
                 best_configuration_per_dataset[dataset_name] = configuration_idx
 
         self.best_configuration_per_dataset = best_configuration_per_dataset
@@ -72,9 +74,14 @@ def fit(self, metafeatures, runs):
             raise ValueError(self.metric)
 
         self._nearest_neighbors = NearestNeighbors(
-            n_neighbors=self.num_datasets, radius=None, algorithm="brute",
-            leaf_size=30, metric=self._metric, p=self._p,
-            metric_params=self.metric_params)
+            n_neighbors=self.num_datasets,
+            radius=None,
+            algorithm="brute",
+            leaf_size=30,
+            metric=self._metric,
+            p=self._p,
+            metric_params=self.metric_params,
+        )
 
     def kNearestDatasets(self, x, k=1, return_distance=False):
         """Return the k most similar datasets with respect to self.metric
@@ -101,7 +108,7 @@ def kNearestDatasets(self, x, k=1, return_distance=False):
         """
         assert type(x) == pd.Series
         if k < -1 or k == 0:
-            raise ValueError('Number of neighbors k cannot be zero or negative.')
+            raise ValueError("Number of neighbors k cannot be zero or negative.")
         elif k == -1:
             k = self.num_datasets
 
@@ -110,14 +117,17 @@ def kNearestDatasets(self, x, k=1, return_distance=False):
         x = self.scaler.transform(x)
         self._nearest_neighbors.fit(X_train)
         distances, neighbor_indices = self._nearest_neighbors.kneighbors(
-            x, n_neighbors=k, return_distance=True)
+            x, n_neighbors=k, return_distance=True
+        )
 
         assert k == neighbor_indices.shape[1]
 
-        rval = [self.metafeatures.index[i]
-                # Neighbor indices is 2d, each row is the indices for one
-                # dataset in x.
-                for i in neighbor_indices[0]]
+        rval = [
+            self.metafeatures.index[i]
+            # Neighbor indices is 2d, each row is the indices for one
+            # dataset in x.
+            for i in neighbor_indices[0]
+        ]
 
         if return_distance is False:
             return rval
@@ -127,19 +137,19 @@ def kNearestDatasets(self, x, k=1, return_distance=False):
     def kBestSuggestions(self, x, k=1, exclude_double_configurations=True):
         assert type(x) == pd.Series
         if k < -1 or k == 0:
-            raise ValueError('Number of neighbors k cannot be zero or negative.')
-        nearest_datasets, distances = self.kNearestDatasets(x, -1,
-                                                            return_distance=True)
+            raise ValueError("Number of neighbors k cannot be zero or negative.")
+        nearest_datasets, distances = self.kNearestDatasets(x, -1, return_distance=True)
 
         kbest = []
 
         added_configurations = set()
         for dataset_name, distance in zip(nearest_datasets, distances):
-            best_configuration = self.best_configuration_per_dataset[
-                dataset_name]
+            best_configuration = self.best_configuration_per_dataset[dataset_name]
 
             if best_configuration is None:
-                self.logger.info("Found no best configuration for instance %s" % dataset_name)
+                self.logger.info(
+                    "Found no best configuration for instance %s" % dataset_name
+                )
                 continue
 
             if exclude_double_configurations:
diff --git a/autosklearn/metalearning/metalearning/meta_base.py b/autosklearn/metalearning/metalearning/meta_base.py
index 13653de528..f193a61fef 100644
--- a/autosklearn/metalearning/metalearning/meta_base.py
+++ b/autosklearn/metalearning/metalearning/meta_base.py
@@ -2,10 +2,10 @@
 
 import numpy as np
 import pandas as pd
+from ConfigSpace.configuration_space import Configuration
 
 from ..input import aslib_simple
 from ..metafeatures.metafeature import DatasetMetafeatures
-from ConfigSpace.configuration_space import Configuration
 
 
 class Run(object):
@@ -15,8 +15,11 @@ def __init__(self, configuration, result, runtime):
         self.runtime = runtime
 
     def __repr__(self):
-        return "Run:\nresult: %3.3f\nruntime: %3.3f\n%s" % \
-               (self.result, self.runtime, str(self.configuration))
+        return "Run:\nresult: %3.3f\nruntime: %3.3f\n%s" % (
+            self.result,
+            self.runtime,
+            str(self.configuration),
+        )
 
 
 class Instance(object):
@@ -41,15 +44,18 @@ def __init__(self, configuration_space, aslib_directory, logger):
 
         aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory)
         self.metafeatures = aslib_reader.metafeatures
-        self.algorithm_runs: OrderedDict[str, pd.DataFrame] = aslib_reader.algorithm_runs
+        self.algorithm_runs: OrderedDict[
+            str, pd.DataFrame
+        ] = aslib_reader.algorithm_runs
         self.configurations = aslib_reader.configurations
 
         configurations = dict()
         for algorithm_id in self.configurations:
             configuration = self.configurations[algorithm_id]
             try:
-                configurations[str(algorithm_id)] = \
-                    (Configuration(configuration_space, values=configuration))
+                configurations[str(algorithm_id)] = Configuration(
+                    configuration_space, values=configuration
+                )
             except (ValueError, KeyError) as e:
                 self.logger.debug("Error reading configurations: %s", e)
 
@@ -58,11 +64,13 @@ def __init__(self, configuration_space, aslib_directory, logger):
     def add_dataset(self, name, metafeatures):
         metafeatures.name = name
         if isinstance(metafeatures, DatasetMetafeatures):
-            data_ = {mf.name: mf.value for mf in metafeatures.metafeature_values.values()}
+            data_ = {
+                mf.name: mf.value for mf in metafeatures.metafeature_values.values()
+            }
             metafeatures = pd.Series(name=name, data=data_, dtype=np.float64)
         if name.lower() in self.metafeatures.index:
             self.logger.warning(
-                'Dataset %s already in meta-data. Removing occurence.', name.lower()
+                "Dataset %s already in meta-data. Removing occurence.", name.lower()
             )
             self.metafeatures.drop(name.lower(), inplace=True)
         self.metafeatures = self.metafeatures.append(metafeatures)
@@ -97,8 +105,7 @@ def _get_metafeatures(self, features):
         """This is inside an extra function for testing purpose"""
         # Load the task
 
-        self.logger.info("Going to use the following metafeature subset: %s",
-                         features)
+        self.logger.info("Going to use the following metafeature subset: %s", features)
         all_metafeatures = self.metafeatures
         all_metafeatures = all_metafeatures.loc[:, features]
 
diff --git a/autosklearn/metalearning/mismbo.py b/autosklearn/metalearning/mismbo.py
index 8a4f2e2bed..7b2956c489 100644
--- a/autosklearn/metalearning/mismbo.py
+++ b/autosklearn/metalearning/mismbo.py
@@ -2,15 +2,19 @@
 
 import time
 
-from autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner \
-    import MetaLearningOptimizer
-from autosklearn.constants \
-    import MULTILABEL_CLASSIFICATION, MULTICLASS_CLASSIFICATION, TASK_TYPES_TO_STRING
+from autosklearn.constants import (
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    TASK_TYPES_TO_STRING,
+)
+from autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner import (
+    MetaLearningOptimizer,
+)
 
 
 def suggest_via_metalearning(
-        meta_base, dataset_name, metric, task, sparse,
-        num_initial_configurations, logger):
+    meta_base, dataset_name, metric, task, sparse, num_initial_configurations, logger
+):
 
     if task == MULTILABEL_CLASSIFICATION:
         task = MULTICLASS_CLASSIFICATION
@@ -24,10 +28,10 @@ def suggest_via_metalearning(
         dataset_name=dataset_name,
         configuration_space=meta_base.configuration_space,
         meta_base=meta_base,
-        distance='l1',
+        distance="l1",
         seed=1,
-        logger=logger,)
-    logger.info('Reading meta-data took %5.2f seconds',
-                time.time() - start)
+        logger=logger,
+    )
+    logger.info("Reading meta-data took %5.2f seconds", time.time() - start)
     runs = ml.metalearning_suggest_all(exclude_double_configurations=True)
     return runs[:num_initial_configurations]
diff --git a/autosklearn/metalearning/optimizers/__init__.py b/autosklearn/metalearning/optimizers/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/autosklearn/metalearning/optimizers/__init__.py
+++ b/autosklearn/metalearning/optimizers/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/autosklearn/metalearning/optimizers/metalearn_optimizer/metalearner.py b/autosklearn/metalearning/optimizers/metalearn_optimizer/metalearner.py
index 987f40b0f7..eff2956b9b 100644
--- a/autosklearn/metalearning/optimizers/metalearn_optimizer/metalearner.py
+++ b/autosklearn/metalearning/optimizers/metalearn_optimizer/metalearner.py
@@ -1,6 +1,7 @@
 import ast
-import pandas as pd
+
 import numpy as np
+import pandas as pd
 import sklearn.utils
 
 from autosklearn.metalearning.metalearning.kNearestDatasets.kND import KNearestDatasets
@@ -11,9 +12,17 @@ def test_function(params):
 
 
 class MetaLearningOptimizer(object):
-    def __init__(self, dataset_name, configuration_space,
-                 meta_base, logger, distance='l1', seed=None, use_features=None,
-                 distance_kwargs=None):
+    def __init__(
+        self,
+        dataset_name,
+        configuration_space,
+        meta_base,
+        logger,
+        distance="l1",
+        seed=None,
+        use_features=None,
+        distance_kwargs=None,
+    ):
         self.dataset_name = dataset_name
         self.configuration_space = configuration_space
         self.meta_base = meta_base
@@ -21,7 +30,7 @@ def __init__(self, dataset_name, configuration_space,
         self.seed = seed
         self.use_features = use_features
         self.distance_kwargs = distance_kwargs
-        self.kND = None     # For caching, makes things faster...
+        self.kND = None  # For caching, makes things faster...
 
         self.logger = logger
 
@@ -32,7 +41,9 @@ def metalearning_suggest_all(self, exclude_double_configurations=True):
         hp_list = []
         for neighbor in neighbors:
             try:
-                configuration = self.meta_base.get_configuration_from_algorithm_index(neighbor[2])
+                configuration = self.meta_base.get_configuration_from_algorithm_index(
+                    neighbor[2]
+                )
                 self.logger.info("%s %s %s" % (neighbor[0], neighbor[1], configuration))
             except (KeyError):
                 self.logger.warning("Configuration %s not found" % neighbor[2])
@@ -42,14 +53,16 @@ def metalearning_suggest_all(self, exclude_double_configurations=True):
         return hp_list
 
     def metalearning_suggest(self, history):
-        """Suggest the next most promising hyperparameters which were not yet evaluated"""
+        """Suggest the next promosing hyperparameters which were not yet evaluated"""
         # TODO test the object in the history!
         neighbors = self._learn()
         # Iterate over all datasets which are sorted ascending by distance
 
         history_with_indices = []
         for run in history:
-            history_with_indices.append(self.meta_base.get_algorithm_index_from_configuration(run))
+            history_with_indices.append(
+                self.meta_base.get_algorithm_index_from_configuration(run)
+            )
 
         for idx, neighbor in enumerate(neighbors):
             already_evaluated = False
@@ -62,16 +75,18 @@ def metalearning_suggest(self, history):
                     break
 
             if not already_evaluated:
-                self.logger.info("Nearest dataset with hyperparameters of best value "
-                                 "not evaluated yet is %s with a distance of %f" %
-                                 (neighbor[0], neighbor[1]))
+                self.logger.info(
+                    "Nearest dataset with hyperparameters of best value "
+                    "not evaluated yet is %s with a distance of %f"
+                    % (neighbor[0], neighbor[1])
+                )
                 return self.meta_base.get_configuration_from_algorithm_index(
-                    neighbor[2])
+                    neighbor[2]
+                )
         raise StopIteration("No more values available.")
 
     def _learn(self, exclude_double_configurations=True):
-        dataset_metafeatures, all_other_metafeatures = \
-            self._split_metafeature_array()
+        dataset_metafeatures, all_other_metafeatures = self._split_metafeature_array()
 
         # Remove metafeatures which could not be calculated for the target
         # dataset
@@ -85,7 +100,8 @@ def _learn(self, exclude_double_configurations=True):
 
         # Do mean imputation of all other metafeatures
         all_other_metafeatures = all_other_metafeatures.fillna(
-            all_other_metafeatures.mean())
+            all_other_metafeatures.mean()
+        )
 
         if self.kND is None:
             # In case that we learn our distance function, get_value the parameters for
@@ -98,10 +114,12 @@ def _learn(self, exclude_double_configurations=True):
             # To keep the distance the same in every iteration, we create a new
             # random state
             random_state = sklearn.utils.check_random_state(self.seed)
-            kND = KNearestDatasets(metric=self.distance,
-                                   random_state=random_state,
-                                   logger=self.logger,
-                                   metric_params=rf_params)
+            kND = KNearestDatasets(
+                metric=self.distance,
+                random_state=random_state,
+                logger=self.logger,
+                metric_params=rf_params,
+            )
 
             runs = dict()
             # TODO move this code to the metabase
@@ -121,13 +139,15 @@ def _learn(self, exclude_double_configurations=True):
             dataset_metafeatures,
             k=-1,
             exclude_double_configurations=exclude_double_configurations,
-            )
+        )
 
     def _split_metafeature_array(self):
         dataset_metafeatures = self.meta_base.get_metafeatures(
-            self.dataset_name, self.use_features)
+            self.dataset_name, self.use_features
+        )
         all_other_datasets = self.meta_base.get_all_dataset_names()
         all_other_datasets.remove(self.dataset_name)
         all_other_metafeatures = self.meta_base.get_metafeatures(
-            all_other_datasets, self.use_features)
+            all_other_datasets, self.use_features
+        )
         return dataset_metafeatures, all_other_metafeatures
diff --git a/autosklearn/metalearning/optimizers/optimizer_base.py b/autosklearn/metalearning/optimizers/optimizer_base.py
index dd336fa4a8..e437f1bb64 100644
--- a/autosklearn/metalearning/optimizers/optimizer_base.py
+++ b/autosklearn/metalearning/optimizers/optimizer_base.py
@@ -1,7 +1,7 @@
+import subprocess
 from collections import OrderedDict
-from itertools import product
 from io import StringIO
-import subprocess
+from itertools import product
 
 
 def _parse_categorical(line):
@@ -26,7 +26,7 @@ def _parse_categorical(line):
 
     first_bracket = line.find("{")
     second_bracket = line.find("}")
-    domain_values = line[first_bracket + 1:second_bracket]
+    domain_values = line[first_bracket + 1 : second_bracket]
     cat_values = domain_values.split(",")
     if len(cat_values) < 1:
         raise ValueError("Expected at least one value in %s" % line)
diff --git a/autosklearn/metrics/__init__.py b/autosklearn/metrics/__init__.py
index cb6920979f..3234329658 100644
--- a/autosklearn/metrics/__init__.py
+++ b/autosklearn/metrics/__init__.py
@@ -1,18 +1,22 @@
 from abc import ABCMeta, abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Union, cast
+
 from functools import partial
 from itertools import product
-from typing import Any, Callable, Dict, List, Optional, Union, cast
 
 import numpy as np
-
 import sklearn.metrics
 from sklearn.utils.multiclass import type_of_target
-
 from smac.utils.constants import MAXINT
 
 from autosklearn.constants import (
-    BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION,
-    MULTIOUTPUT_REGRESSION, REGRESSION, REGRESSION_TASKS, TASK_TYPES,
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    MULTIOUTPUT_REGRESSION,
+    REGRESSION,
+    REGRESSION_TASKS,
+    TASK_TYPES,
 )
 
 from .util import sanitize_array
@@ -26,7 +30,7 @@ def __init__(
         optimum: float,
         worst_possible_result: float,
         sign: float,
-        kwargs: Any
+        kwargs: Any,
     ) -> None:
         self.name = name
         self._kwargs = kwargs
@@ -40,7 +44,7 @@ def __call__(
         self,
         y_true: np.ndarray,
         y_pred: np.ndarray,
-        sample_weight: Optional[List[float]] = None
+        sample_weight: Optional[List[float]] = None,
     ) -> float:
         pass
 
@@ -53,7 +57,7 @@ def __call__(
         self,
         y_true: np.ndarray,
         y_pred: np.ndarray,
-        sample_weight: Optional[List[float]] = None
+        sample_weight: Optional[List[float]] = None,
     ) -> float:
         """Evaluate predicted target values for X relative to y_true.
 
@@ -74,34 +78,37 @@ def __call__(
             Score function applied to prediction of estimator on X.
         """
         type_true = type_of_target(y_true)
-        if type_true == 'binary' and type_of_target(y_pred) == 'continuous' and \
-                len(y_pred.shape) == 1:
+        if (
+            type_true == "binary"
+            and type_of_target(y_pred) == "continuous"
+            and len(y_pred.shape) == 1
+        ):
             # For a pred scorer, no threshold, nor probability is required
             # If y_true is binary, and y_pred is continuous
             # it means that a rounding is necessary to obtain the binary class
             y_pred = np.around(y_pred, decimals=0)
-        elif len(y_pred.shape) == 1 or y_pred.shape[1] == 1 or \
-                type_true == 'continuous':
+        elif (
+            len(y_pred.shape) == 1 or y_pred.shape[1] == 1 or type_true == "continuous"
+        ):
             # must be regression, all other task types would return at least
             # two probabilities
             pass
-        elif type_true in ['binary', 'multiclass']:
+        elif type_true in ["binary", "multiclass"]:
             y_pred = np.argmax(y_pred, axis=1)
-        elif type_true == 'multilabel-indicator':
+        elif type_true == "multilabel-indicator":
             y_pred[y_pred > 0.5] = 1.0
             y_pred[y_pred <= 0.5] = 0.0
-        elif type_true == 'continuous-multioutput':
+        elif type_true == "continuous-multioutput":
             pass
         else:
             raise ValueError(type_true)
 
         if sample_weight is not None:
-            return self._sign * self._score_func(y_true, y_pred,
-                                                 sample_weight=sample_weight,
-                                                 **self._kwargs)
+            return self._sign * self._score_func(
+                y_true, y_pred, sample_weight=sample_weight, **self._kwargs
+            )
         else:
-            return self._sign * self._score_func(y_true, y_pred,
-                                                 **self._kwargs)
+            return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
 
 
 class _ProbaScorer(Scorer):
@@ -109,7 +116,7 @@ def __call__(
         self,
         y_true: np.ndarray,
         y_pred: np.ndarray,
-        sample_weight: Optional[List[float]] = None
+        sample_weight: Optional[List[float]] = None,
     ) -> float:
         """Evaluate predicted probabilities for X relative to y_true.
         Parameters
@@ -136,21 +143,24 @@ def __call__(
             if n_labels_pred != n_labels_test:
                 labels = list(range(n_labels_pred))
                 if sample_weight is not None:
-                    return self._sign * self._score_func(y_true, y_pred,
-                                                         sample_weight=sample_weight,
-                                                         labels=labels,
-                                                         **self._kwargs)
+                    return self._sign * self._score_func(
+                        y_true,
+                        y_pred,
+                        sample_weight=sample_weight,
+                        labels=labels,
+                        **self._kwargs,
+                    )
                 else:
-                    return self._sign * self._score_func(y_true, y_pred,
-                                                         labels=labels, **self._kwargs)
+                    return self._sign * self._score_func(
+                        y_true, y_pred, labels=labels, **self._kwargs
+                    )
 
         if sample_weight is not None:
-            return self._sign * self._score_func(y_true, y_pred,
-                                                 sample_weight=sample_weight,
-                                                 **self._kwargs)
+            return self._sign * self._score_func(
+                y_true, y_pred, sample_weight=sample_weight, **self._kwargs
+            )
         else:
-            return self._sign * self._score_func(y_true, y_pred,
-                                                 **self._kwargs)
+            return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
 
 
 class _ThresholdScorer(Scorer):
@@ -158,7 +168,7 @@ def __call__(
         self,
         y_true: np.ndarray,
         y_pred: np.ndarray,
-        sample_weight: Optional[List[float]] = None
+        sample_weight: Optional[List[float]] = None,
     ) -> float:
         """Evaluate decision function output for X relative to y_true.
         Parameters
@@ -189,9 +199,9 @@ def __call__(
             y_pred = np.vstack([p[:, -1] for p in y_pred]).T
 
         if sample_weight is not None:
-            return self._sign * self._score_func(y_true, y_pred,
-                                                 sample_weight=sample_weight,
-                                                 **self._kwargs)
+            return self._sign * self._score_func(
+                y_true, y_pred, sample_weight=sample_weight, **self._kwargs
+            )
         else:
             return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
 
@@ -204,7 +214,7 @@ def make_scorer(
     greater_is_better: bool = True,
     needs_proba: bool = False,
     needs_threshold: bool = False,
-    **kwargs: Any
+    **kwargs: Any,
 ) -> Scorer:
     """Make a scorer from a performance metric or loss function.
 
@@ -244,96 +254,114 @@ def make_scorer(
     """
     sign = 1 if greater_is_better else -1
     if needs_proba:
-        return _ProbaScorer(name, score_func, optimum, worst_possible_result, sign, kwargs)
+        return _ProbaScorer(
+            name, score_func, optimum, worst_possible_result, sign, kwargs
+        )
     elif needs_threshold:
-        return _ThresholdScorer(name, score_func, optimum, worst_possible_result, sign, kwargs)
+        return _ThresholdScorer(
+            name, score_func, optimum, worst_possible_result, sign, kwargs
+        )
     else:
-        return _PredictScorer(name, score_func, optimum, worst_possible_result, sign, kwargs)
+        return _PredictScorer(
+            name, score_func, optimum, worst_possible_result, sign, kwargs
+        )
 
 
 # Standard regression scores
-mean_absolute_error = make_scorer('mean_absolute_error',
-                                  sklearn.metrics.mean_absolute_error,
-                                  optimum=0,
-                                  worst_possible_result=MAXINT,
-                                  greater_is_better=False)
-mean_squared_error = make_scorer('mean_squared_error',
-                                 sklearn.metrics.mean_squared_error,
-                                 optimum=0,
-                                 worst_possible_result=MAXINT,
-                                 greater_is_better=False,
-                                 squared=True)
-root_mean_squared_error = make_scorer('root_mean_squared_error',
-                                      sklearn.metrics.mean_squared_error,
-                                      optimum=0,
-                                      worst_possible_result=MAXINT,
-                                      greater_is_better=False,
-                                      squared=False)
-mean_squared_log_error = make_scorer('mean_squared_log_error',
-                                     sklearn.metrics.mean_squared_log_error,
-                                     optimum=0,
-                                     worst_possible_result=MAXINT,
-                                     greater_is_better=False,)
-median_absolute_error = make_scorer('median_absolute_error',
-                                    sklearn.metrics.median_absolute_error,
-                                    optimum=0,
-                                    worst_possible_result=MAXINT,
-                                    greater_is_better=False)
-
-r2 = make_scorer('r2', sklearn.metrics.r2_score)
+mean_absolute_error = make_scorer(
+    "mean_absolute_error",
+    sklearn.metrics.mean_absolute_error,
+    optimum=0,
+    worst_possible_result=MAXINT,
+    greater_is_better=False,
+)
+mean_squared_error = make_scorer(
+    "mean_squared_error",
+    sklearn.metrics.mean_squared_error,
+    optimum=0,
+    worst_possible_result=MAXINT,
+    greater_is_better=False,
+    squared=True,
+)
+root_mean_squared_error = make_scorer(
+    "root_mean_squared_error",
+    sklearn.metrics.mean_squared_error,
+    optimum=0,
+    worst_possible_result=MAXINT,
+    greater_is_better=False,
+    squared=False,
+)
+mean_squared_log_error = make_scorer(
+    "mean_squared_log_error",
+    sklearn.metrics.mean_squared_log_error,
+    optimum=0,
+    worst_possible_result=MAXINT,
+    greater_is_better=False,
+)
+median_absolute_error = make_scorer(
+    "median_absolute_error",
+    sklearn.metrics.median_absolute_error,
+    optimum=0,
+    worst_possible_result=MAXINT,
+    greater_is_better=False,
+)
+
+r2 = make_scorer("r2", sklearn.metrics.r2_score)
 
 # Standard Classification Scores
-accuracy = make_scorer('accuracy',
-                       sklearn.metrics.accuracy_score)
-balanced_accuracy = make_scorer('balanced_accuracy',
-                                sklearn.metrics.balanced_accuracy_score)
+accuracy = make_scorer("accuracy", sklearn.metrics.accuracy_score)
+balanced_accuracy = make_scorer(
+    "balanced_accuracy", sklearn.metrics.balanced_accuracy_score
+)
 
 # Score functions that need decision values
-roc_auc = make_scorer('roc_auc',
-                      sklearn.metrics.roc_auc_score,
-                      greater_is_better=True,
-                      needs_threshold=True)
-average_precision = make_scorer('average_precision',
-                                sklearn.metrics.average_precision_score,
-                                needs_threshold=True)
+roc_auc = make_scorer(
+    "roc_auc",
+    sklearn.metrics.roc_auc_score,
+    greater_is_better=True,
+    needs_threshold=True,
+)
+average_precision = make_scorer(
+    "average_precision", sklearn.metrics.average_precision_score, needs_threshold=True
+)
 
 # NOTE: zero_division
 #
 #   Specified as the explicit default, see sklearn docs:
 #   https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html#sklearn-metrics-precision-score
 precision = make_scorer(
-    'precision', partial(sklearn.metrics.precision_score, zero_division=0)
-)
-recall = make_scorer(
-    'recall', partial(sklearn.metrics.recall_score, zero_division=0)
-)
-f1 = make_scorer(
-    'f1', partial(sklearn.metrics.f1_score, zero_division=0)
+    "precision", partial(sklearn.metrics.precision_score, zero_division=0)
 )
+recall = make_scorer("recall", partial(sklearn.metrics.recall_score, zero_division=0))
+f1 = make_scorer("f1", partial(sklearn.metrics.f1_score, zero_division=0))
 
 # Score function for probabilistic classification
-log_loss = make_scorer('log_loss',
-                       sklearn.metrics.log_loss,
-                       optimum=0,
-                       worst_possible_result=MAXINT,
-                       greater_is_better=False,
-                       needs_proba=True)
+log_loss = make_scorer(
+    "log_loss",
+    sklearn.metrics.log_loss,
+    optimum=0,
+    worst_possible_result=MAXINT,
+    greater_is_better=False,
+    needs_proba=True,
+)
 # TODO what about mathews correlation coefficient etc?
 
 
 REGRESSION_METRICS = {
     scorer.name: scorer
     for scorer in [
-        mean_absolute_error, mean_squared_error, root_mean_squared_error,
-        mean_squared_log_error, median_absolute_error, r2
+        mean_absolute_error,
+        mean_squared_error,
+        root_mean_squared_error,
+        mean_squared_log_error,
+        median_absolute_error,
+        r2,
     ]
 }
 
 CLASSIFICATION_METRICS = {
     scorer.name: scorer
-    for scorer in [
-        accuracy, balanced_accuracy, roc_auc, average_precision, log_loss
-    ]
+    for scorer in [accuracy, balanced_accuracy, roc_auc, average_precision, log_loss]
 }
 
 # NOTE: zero_division
@@ -342,13 +370,13 @@ def make_scorer(
 #   https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html#sklearn-metrics-precision-score
 for (base_name, sklearn_metric), average in product(
     [
-        ('precision', sklearn.metrics.precision_score),
-        ('recall', sklearn.metrics.recall_score),
-        ('f1', sklearn.metrics.f1_score),
+        ("precision", sklearn.metrics.precision_score),
+        ("recall", sklearn.metrics.recall_score),
+        ("f1", sklearn.metrics.f1_score),
     ],
-    ['macro', 'micro', 'samples', 'weighted']
+    ["macro", "micro", "samples", "weighted"],
 ):
-    name = f'{base_name}_{average}'
+    name = f"{base_name}_{average}"
     scorer = make_scorer(
         name, partial(sklearn_metric, pos_label=None, average=average, zero_division=0)
     )
@@ -361,7 +389,7 @@ def calculate_score(
     prediction: np.ndarray,
     task_type: int,
     metric: Scorer,
-    scoring_functions: Optional[List[Scorer]] = None
+    scoring_functions: Optional[List[Scorer]] = None,
 ) -> Union[float, Dict[str, float]]:
     """
     Returns a score (a magnitude that allows casting the
@@ -396,11 +424,15 @@ def calculate_score(
 
                 try:
                     score_dict[metric_.name] = _compute_scorer(
-                        metric_, prediction, solution, task_type)
+                        metric_, prediction, solution, task_type
+                    )
                 except ValueError as e:
                     print(e, e.args[0])
-                    if e.args[0] == "Mean Squared Logarithmic Error cannot be used when " \
-                                    "targets contain negative values.":
+                    if (
+                        e.args[0]
+                        == "Mean Squared Logarithmic Error cannot be used when "
+                        "targets contain negative values."
+                    ):
                         continue
                     else:
                         raise e
@@ -413,16 +445,21 @@ def calculate_score(
 
                 try:
                     score_dict[metric_.name] = _compute_scorer(
-                        metric_, prediction, solution, task_type)
+                        metric_, prediction, solution, task_type
+                    )
                 except ValueError as e:
-                    if e.args[0] == 'multiclass format is not supported':
+                    if e.args[0] == "multiclass format is not supported":
                         continue
-                    elif e.args[0] == "Samplewise metrics are not available "\
-                            "outside of multilabel classification.":
+                    elif (
+                        e.args[0] == "Samplewise metrics are not available "
+                        "outside of multilabel classification."
+                    ):
                         continue
-                    elif e.args[0] == "Target is multiclass but "\
-                            "average='binary'. Please choose another average "\
-                            "setting, one of [None, 'micro', 'macro', 'weighted'].":
+                    elif (
+                        e.args[0] == "Target is multiclass but "
+                        "average='binary'. Please choose another average "
+                        "setting, one of [None, 'micro', 'macro', 'weighted']."
+                    ):
                         continue
                     else:
                         raise e
@@ -438,7 +475,7 @@ def calculate_loss(
     prediction: np.ndarray,
     task_type: int,
     metric: Scorer,
-    scoring_functions: Optional[List[Scorer]] = None
+    scoring_functions: Optional[List[Scorer]] = None,
 ) -> Union[float, Dict[str, float]]:
     """
     Returns a loss (a magnitude that allows casting the
@@ -493,10 +530,7 @@ def calculate_loss(
 
 
 def calculate_metric(
-    metric: Scorer,
-    prediction: np.ndarray,
-    solution: np.ndarray,
-    task_type: int
+    metric: Scorer, prediction: np.ndarray, solution: np.ndarray, task_type: int
 ) -> float:
     """
     Returns a metric for the given Auto-Sklearn Scorer object.
@@ -529,10 +563,7 @@ def calculate_metric(
 
 
 def _compute_scorer(
-    metric: Scorer,
-    prediction: np.ndarray,
-    solution: np.ndarray,
-    task_type: int
+    metric: Scorer, prediction: np.ndarray, solution: np.ndarray, task_type: int
 ) -> float:
     """
     Returns a score (a magnitude that allows casting the
@@ -566,9 +597,9 @@ def _compute_scorer(
 
 # Must be at bottom so all metrics are defined
 default_metric_for_task: Dict[int, Scorer] = {
-    BINARY_CLASSIFICATION: CLASSIFICATION_METRICS['accuracy'],
-    MULTICLASS_CLASSIFICATION: CLASSIFICATION_METRICS['accuracy'],
-    MULTILABEL_CLASSIFICATION: CLASSIFICATION_METRICS['f1_macro'],
-    REGRESSION: REGRESSION_METRICS['r2'],
-    MULTIOUTPUT_REGRESSION: REGRESSION_METRICS['r2'],
+    BINARY_CLASSIFICATION: CLASSIFICATION_METRICS["accuracy"],
+    MULTICLASS_CLASSIFICATION: CLASSIFICATION_METRICS["accuracy"],
+    MULTILABEL_CLASSIFICATION: CLASSIFICATION_METRICS["f1_macro"],
+    REGRESSION: REGRESSION_METRICS["r2"],
+    MULTIOUTPUT_REGRESSION: REGRESSION_METRICS["r2"],
 }
diff --git a/autosklearn/metrics/util.py b/autosklearn/metrics/util.py
index b4537d13aa..c25a25b4dc 100644
--- a/autosklearn/metrics/util.py
+++ b/autosklearn/metrics/util.py
@@ -11,8 +11,8 @@ def sanitize_array(array: np.ndarray) -> np.ndarray:
     a = np.ravel(array)
     maxi = np.nanmax(a[np.isfinite(a)])
     mini = np.nanmin(a[np.isfinite(a)])
-    array[array == float('inf')] = maxi
-    array[array == float('-inf')] = mini
+    array[array == float("inf")] = maxi
+    array[array == float("-inf")] = mini
     mid = (maxi + mini) / 2
     array[np.isnan(array)] = mid
     return array
diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
index 467e7b26c1..93c73b4716 100644
--- a/autosklearn/pipeline/base.py
+++ b/autosklearn/pipeline/base.py
@@ -1,17 +1,15 @@
 from abc import ABCMeta
 from typing import Dict, Union
 
-from ConfigSpace import Configuration
-
 import numpy as np
-
 import scipy.sparse
-
+from ConfigSpace import Configuration
 from sklearn.pipeline import Pipeline
 
-from .components.base import AutoSklearnChoice, AutoSklearnComponent
 import autosklearn.pipeline.create_searchspace_util
 
+from .components.base import AutoSklearnChoice, AutoSklearnComponent
+
 DATASET_PROPERTIES_TYPE = Dict[str, Union[str, int, bool]]
 PIPELINE_DATA_DTYPE = Union[
     np.ndarray,
@@ -31,17 +29,26 @@ class BasePipeline(Pipeline):
     Notes
     -----
     This class should not be instantiated, only subclassed."""
+
     __metaclass__ = ABCMeta
 
-    def __init__(self, config=None, steps=None, dataset_properties=None,
-                 include=None, exclude=None, random_state=None,
-                 init_params=None):
+    def __init__(
+        self,
+        config=None,
+        steps=None,
+        dataset_properties=None,
+        include=None,
+        exclude=None,
+        random_state=None,
+        init_params=None,
+    ):
 
         self.init_params = init_params if init_params is not None else {}
         self.include = include if include is not None else {}
         self.exclude = exclude if exclude is not None else {}
-        self.dataset_properties = dataset_properties if \
-            dataset_properties is not None else {}
+        self.dataset_properties = (
+            dataset_properties if dataset_properties is not None else {}
+        )
         self.random_state = random_state
 
         if steps is None:
@@ -62,13 +69,17 @@ def __init__(self, config=None, steps=None, dataset_properties=None,
                 print(self.config_space._children)
                 print(config.configuration_space._children)
                 import difflib
+
                 diff = difflib.unified_diff(
                     str(self.config_space).splitlines(),
-                    str(config.configuration_space).splitlines())
-                diff = '\n'.join(diff)
-                raise ValueError('Configuration passed does not come from the '
-                                 'same configuration space. Differences are: '
-                                 '%s' % diff)
+                    str(config.configuration_space).splitlines(),
+                )
+                diff = "\n".join(diff)
+                raise ValueError(
+                    "Configuration passed does not come from the "
+                    "same configuration space. Differences are: "
+                    "%s" % diff
+                )
             self.config = config
 
         self.set_hyperparameters(self.config, init_params=init_params)
@@ -111,21 +122,22 @@ def fit_transformer(self, X, y, fit_params=None):
         self.num_targets = 1 if len(y.shape) == 1 else y.shape[1]
         if fit_params is None:
             fit_params = {}
-        fit_params = {key.replace(":", "__"): value for key, value in
-                      fit_params.items()}
+        fit_params = {
+            key.replace(":", "__"): value for key, value in fit_params.items()
+        }
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt = self._fit(X, y, **fit_params_steps)
         return Xt, fit_params_steps[self.steps[-1][0]]
 
     def fit_estimator(self, X, y, **fit_params):
-        fit_params = {key.replace(":", "__"): value for key, value in
-                      fit_params.items()}
+        fit_params = {
+            key.replace(":", "__"): value for key, value in fit_params.items()
+        }
         self._final_estimator.fit(X, y, **fit_params)
         return self
 
     def iterative_fit(self, X, y, n_iter=1, **fit_params):
-        self._final_estimator.iterative_fit(X, y, n_iter=n_iter,
-                                            **fit_params)
+        self._final_estimator.iterative_fit(X, y, n_iter=n_iter, **fit_params)
 
     def estimator_supports_iterative_fit(self):
         return self._final_estimator.estimator_supports_iterative_fit()
@@ -163,26 +175,30 @@ def predict(self, X, batch_size=None):
             return super().predict(X).astype(self._output_dtype)
         else:
             if not isinstance(batch_size, int):
-                raise ValueError("Argument 'batch_size' must be of type int, "
-                                 "but is '%s'" % type(batch_size))
+                raise ValueError(
+                    "Argument 'batch_size' must be of type int, "
+                    "but is '%s'" % type(batch_size)
+                )
             if batch_size <= 0:
-                raise ValueError("Argument 'batch_size' must be positive, "
-                                 "but is %d" % batch_size)
+                raise ValueError(
+                    "Argument 'batch_size' must be positive, " "but is %d" % batch_size
+                )
 
             else:
                 if self.num_targets == 1:
                     y = np.zeros((X.shape[0],), dtype=self._output_dtype)
                 else:
-                    y = np.zeros((X.shape[0], self.num_targets),
-                                 dtype=self._output_dtype)
+                    y = np.zeros(
+                        (X.shape[0], self.num_targets), dtype=self._output_dtype
+                    )
 
                 # Copied and adapted from the scikit-learn GP code
-                for k in range(max(1, int(np.ceil(float(X.shape[0]) /
-                                                  batch_size)))):
+                for k in range(max(1, int(np.ceil(float(X.shape[0]) / batch_size)))):
                     batch_from = k * batch_size
                     batch_to = min([(k + 1) * batch_size, X.shape[0]])
-                    y[batch_from:batch_to] = \
-                        self.predict(X[batch_from:batch_to], batch_size=None)
+                    y[batch_from:batch_to] = self.predict(
+                        X[batch_from:batch_to], batch_size=None
+                    )
 
                 return y
 
@@ -197,29 +213,33 @@ def set_hyperparameters(self, configuration, init_params=None):
             )
             sub_config_dict = {}
             for param in configuration:
-                if param.startswith('%s:' % node_name):
+                if param.startswith("%s:" % node_name):
                     value = configuration[param]
-                    new_name = param.replace('%s:' % node_name, '', 1)
+                    new_name = param.replace("%s:" % node_name, "", 1)
                     sub_config_dict[new_name] = value
 
-            sub_configuration = Configuration(sub_configuration_space,
-                                              values=sub_config_dict)
+            sub_configuration = Configuration(
+                sub_configuration_space, values=sub_config_dict
+            )
 
             if init_params is not None:
                 sub_init_params_dict = {}
                 for param in init_params:
-                    if param.startswith('%s:' % node_name):
+                    if param.startswith("%s:" % node_name):
                         value = init_params[param]
-                        new_name = param.replace('%s:' % node_name, '', 1)
+                        new_name = param.replace("%s:" % node_name, "", 1)
                         sub_init_params_dict[new_name] = value
             else:
                 sub_init_params_dict = None
 
-            if isinstance(node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)):
-                node.set_hyperparameters(configuration=sub_configuration,
-                                         init_params=sub_init_params_dict)
+            if isinstance(
+                node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
+            ):
+                node.set_hyperparameters(
+                    configuration=sub_configuration, init_params=sub_init_params_dict
+                )
             else:
-                raise NotImplementedError('Not supported yet!')
+                raise NotImplementedError("Not supported yet!")
 
         # In-code check to make sure init params
         # is checked after pipeline creation
@@ -236,14 +256,17 @@ def get_hyperparameter_search_space(self, dataset_properties=None):
             The configuration space describing the AutoSklearnClassifier.
 
         """
-        if not hasattr(self, 'config_space') or self.config_space is None:
+        if not hasattr(self, "config_space") or self.config_space is None:
             self.config_space = self._get_hyperparameter_search_space(
-                include=self.include, exclude=self.exclude,
-                dataset_properties=self.dataset_properties)
+                include=self.include,
+                exclude=self.exclude,
+                dataset_properties=self.dataset_properties,
+            )
         return self.config_space
 
-    def _get_hyperparameter_search_space(self, include=None, exclude=None,
-                                         dataset_properties=None):
+    def _get_hyperparameter_search_space(
+        self, include=None, exclude=None, dataset_properties=None
+    ):
         """Return the configuration space for the CASH problem.
 
         This method should be called by the method
@@ -283,8 +306,9 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
         """
         raise NotImplementedError()
 
-    def _get_base_search_space(self, cs, dataset_properties, exclude,
-                               include, pipeline):
+    def _get_base_search_space(
+        self, cs, dataset_properties, exclude, include, pipeline
+    ):
         if include is None:
             if self.include is None:
                 include = {}
@@ -294,8 +318,9 @@ def _get_base_search_space(self, cs, dataset_properties, exclude,
         keys = [pair[0] for pair in pipeline]
         for key in include:
             if key not in keys:
-                raise ValueError('Invalid key in include: %s; should be one '
-                                 'of %s' % (key, keys))
+                raise ValueError(
+                    "Invalid key in include: %s; should be one " "of %s" % (key, keys)
+                )
 
         if exclude is None:
             if self.exclude is None:
@@ -306,26 +331,32 @@ def _get_base_search_space(self, cs, dataset_properties, exclude,
         keys = [pair[0] for pair in pipeline]
         for key in exclude:
             if key not in keys:
-                raise ValueError('Invalid key in exclude: %s; should be one '
-                                 'of %s' % (key, keys))
+                raise ValueError(
+                    "Invalid key in exclude: %s; should be one " "of %s" % (key, keys)
+                )
 
-        if 'sparse' not in dataset_properties:
+        if "sparse" not in dataset_properties:
             # This dataset is probably dense
-            dataset_properties['sparse'] = False
-        if 'signed' not in dataset_properties:
+            dataset_properties["sparse"] = False
+        if "signed" not in dataset_properties:
             # This dataset probably contains unsigned data
-            dataset_properties['signed'] = False
+            dataset_properties["signed"] = False
 
         matches = autosklearn.pipeline.create_searchspace_util.get_match_array(
-            pipeline, dataset_properties, include=include, exclude=exclude)
+            pipeline, dataset_properties, include=include, exclude=exclude
+        )
 
         # Now we have only legal combinations at this step of the pipeline
         # Simple sanity checks
         assert np.sum(matches) != 0, "No valid pipeline found."
 
-        assert np.sum(matches) <= np.size(matches), \
-            "'matches' is not binary; %s <= %d, %s" % \
-            (str(np.sum(matches)), np.size(matches), str(matches.shape))
+        assert np.sum(matches) <= np.size(
+            matches
+        ), "'matches' is not binary; %s <= %d, %s" % (
+            str(np.sum(matches)),
+            np.size(matches),
+            str(matches.shape),
+        )
 
         # Iterate each dimension of the matches array (each step of the
         # pipeline) to see if we can add a hyperparameter for that step
@@ -340,26 +371,36 @@ def _get_base_search_space(self, cs, dataset_properties, exclude,
                 cs.add_configuration_space(
                     node_name,
                     node.get_hyperparameter_search_space(dataset_properties),
-                    )
+                )
             # If the node is a choice, we have to figure out which of its
             #  choices are actually legal choices
             else:
-                choices_list = autosklearn.pipeline.create_searchspace_util.\
-                    find_active_choices(matches, node, node_idx,
-                                        dataset_properties,
-                                        include.get(node_name),
-                                        exclude.get(node_name))
+                choices_list = (
+                    autosklearn.pipeline.create_searchspace_util.find_active_choices(
+                        matches,
+                        node,
+                        node_idx,
+                        dataset_properties,
+                        include.get(node_name),
+                        exclude.get(node_name),
+                    )
+                )
                 sub_config_space = node.get_hyperparameter_search_space(
-                    dataset_properties, include=choices_list)
+                    dataset_properties, include=choices_list
+                )
                 cs.add_configuration_space(node_name, sub_config_space)
 
         # And now add forbidden parameter configurations
         # According to matches
         if np.sum(matches) < np.size(matches):
             cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
-                conf_space=cs, pipeline=pipeline, matches=matches,
-                dataset_properties=dataset_properties, include=include,
-                exclude=exclude)
+                conf_space=cs,
+                pipeline=pipeline,
+                matches=matches,
+                dataset_properties=dataset_properties,
+                include=include,
+                exclude=exclude,
+            )
 
         return cs
 
@@ -371,36 +412,35 @@ def _check_init_params_honored(self, init_params):
             # None/empty dict, so no further check required
             return
 
-        # There is the scenario, where instance is passed as an argument to the init_params
-        # 'instance': '{"task_id": "73543c4a360aa24498c0967fbc2f926b"}'}
+        # There is the scenario, where instance is passed as an argument to the
+        # init_params 'instance': '{"task_id": "73543c4a360aa24498c0967fbc2f926b"}'}
         # coming from smac instance. Remove this key to make the testing stricter
-        init_params.pop('instance', None)
+        init_params.pop("instance", None)
 
         for key, value in init_params.items():
 
-            if ':' not in key:
-                raise ValueError("Unsupported argument to init_params {}."
-                                 "When using init_params, a hierarchical format like "
-                                 "node_name:parameter must be provided.".format(key)
-                                 )
-            node_name = key.split(':', 1)[0]
+            if ":" not in key:
+                raise ValueError(
+                    "Unsupported argument to init_params {}."
+                    "When using init_params, a hierarchical format like "
+                    "node_name:parameter must be provided.".format(key)
+                )
+            node_name = key.split(":", 1)[0]
             if node_name not in self.named_steps.keys():
-                raise ValueError("The current node name specified via key={} of init_params "
-                                 "is not valid. Valid node names are {}".format(
-                                     key,
-                                     self.named_steps.keys()
-                                 )
-                                 )
+                raise ValueError(
+                    "The current node name specified via key={} of init_params "
+                    "is not valid. Valid node names are {}".format(
+                        key, self.named_steps.keys()
+                    )
+                )
                 continue
-            variable_name = key.split(':')[-1]
+            variable_name = key.split(":")[-1]
             node = self.named_steps[node_name]
             if isinstance(node, BasePipeline):
                 # If dealing with a sub pipe,
                 # Call the child _check_init_params_honored with the updated config
                 node._check_init_params_honored(
-                    {
-                        key.replace('%s:' % node_name, '', 1): value
-                    }
+                    {key.replace("%s:" % node_name, "", 1): value}
                 )
                 continue
 
@@ -412,8 +452,10 @@ def _check_init_params_honored(self, init_params):
                 raise ValueError("Unsupported node type {}".format(type(node)))
 
             if variable_name not in node_dict or node_dict[variable_name] != value:
-                raise ValueError("Cannot properly set the pair {}->{} via init_params"
-                                 "".format(key, value))
+                raise ValueError(
+                    "Cannot properly set the pair {}->{} via init_params"
+                    "".format(key, value)
+                )
 
     def __repr__(self):
         class_name = self.__class__.__name__
@@ -424,34 +466,42 @@ def __repr__(self):
             if self.config[hp_name] is not None:
                 configuration[hp_name] = self.config[hp_name]
 
-        configuration_string = ''.join(
-            ['configuration={\n  ',
-             ',\n  '.join(["'%s': %s" % (hp_name, repr(configuration[hp_name]))
-                           for hp_name in sorted(configuration)]),
-             '}'])
+        configuration_string = "".join(
+            [
+                "configuration={\n  ",
+                ",\n  ".join(
+                    [
+                        "'%s': %s" % (hp_name, repr(configuration[hp_name]))
+                        for hp_name in sorted(configuration)
+                    ]
+                ),
+                "}",
+            ]
+        )
 
         if len(self.dataset_properties) > 0:
             dataset_properties_string = []
-            dataset_properties_string.append('dataset_properties={')
+            dataset_properties_string.append("dataset_properties={")
             for i, item in enumerate(self.dataset_properties.items()):
                 if i != 0:
-                    dataset_properties_string.append(',\n  ')
+                    dataset_properties_string.append(",\n  ")
                 else:
-                    dataset_properties_string.append('\n  ')
+                    dataset_properties_string.append("\n  ")
 
                 if isinstance(item[1], str):
-                    dataset_properties_string.append("'%s': '%s'" % (item[0],
-                                                                     item[1]))
+                    dataset_properties_string.append("'%s': '%s'" % (item[0], item[1]))
                 else:
-                    dataset_properties_string.append("'%s': %s" % (item[0],
-                                                                   item[1]))
-            dataset_properties_string.append('}')
-            dataset_properties_string = ''.join(dataset_properties_string)
-
-            rval = '%s(%s,\n%s)' % (class_name, configuration,
-                                    dataset_properties_string)
+                    dataset_properties_string.append("'%s': %s" % (item[0], item[1]))
+            dataset_properties_string.append("}")
+            dataset_properties_string = "".join(dataset_properties_string)
+
+            rval = "%s(%s,\n%s)" % (
+                class_name,
+                configuration,
+                dataset_properties_string,
+            )
         else:
-            rval = '%s(%s)' % (class_name, configuration_string)
+            rval = "%s(%s)" % (class_name, configuration_string)
 
         return rval
 
@@ -473,32 +523,54 @@ def _validate_include_exclude_params(self):
         if self.include is not None and self.exclude is not None:
             for key in self.include.keys():
                 if key in self.exclude.keys():
-                    raise ValueError("Cannot specify include and exclude for same step '{}'."
-                                     .format(key))
+                    raise ValueError(
+                        "Cannot specify include and exclude for same step '{}'.".format(
+                            key
+                        )
+                    )
 
-        supported_steps = {step[0]: step[1] for step in self.steps
-                           if isinstance(step[1], AutoSklearnChoice)}
-        for arg in ['include', 'exclude']:
+        supported_steps = {
+            step[0]: step[1]
+            for step in self.steps
+            if isinstance(step[1], AutoSklearnChoice)
+        }
+        for arg in ["include", "exclude"]:
             argument = getattr(self, arg)
             if not argument:
                 continue
             for key in list(argument.keys()):
                 if key not in supported_steps:
-                    raise ValueError("The provided key '{}' in the '{}' argument is not valid. The"
-                                     " only supported keys for this task are {}"
-                                     .format(key, arg, list(supported_steps.keys())))
+                    raise ValueError(
+                        "The provided key '{}' in the '{}' argument is not valid. The"
+                        " only supported keys for this task are {}".format(
+                            key, arg, list(supported_steps.keys())
+                        )
+                    )
 
                 candidate_components = argument[key]
-                if not (isinstance(candidate_components, list) and candidate_components):
-                    raise ValueError("The provided value of the key '{}' in the '{}' argument is "
-                                     "not valid. The value must be a non-empty list."
-                                     .format(key, arg))
+                if not (
+                    isinstance(candidate_components, list) and candidate_components
+                ):
+                    raise ValueError(
+                        "The provided value of the key '{}' in the '{}' argument is "
+                        "not valid. The value must be a non-empty list.".format(
+                            key, arg
+                        )
+                    )
 
-                available_components = list(supported_steps[key].get_available_components(
-                    dataset_properties=self.dataset_properties).keys())
+                available_components = list(
+                    supported_steps[key]
+                    .get_available_components(
+                        dataset_properties=self.dataset_properties
+                    )
+                    .keys()
+                )
                 for component in candidate_components:
                     if component not in available_components:
-                        raise ValueError("The provided component '{}' for the key '{}' in the '{}'"
-                                         " argument is not valid. The supported components for the"
-                                         " step '{}' for this task are {}"
-                                         .format(component, key, arg, key, available_components))
+                        raise ValueError(
+                            "The provided component '{}' for the key '{}' in the '{}'"
+                            " argument is not valid. The supported components for the"
+                            " step '{}' for this task are {}".format(
+                                component, key, arg, key, available_components
+                            )
+                        )
diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
index 44ae129f4d..1686e02809 100644
--- a/autosklearn/pipeline/classification.py
+++ b/autosklearn/pipeline/classification.py
@@ -1,21 +1,22 @@
+from typing import Optional, Union
+
 import copy
 from itertools import product
-from typing import Optional, Union
 
 import numpy as np
-
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from sklearn.base import ClassifierMixin
 
-from ConfigSpace.configuration_space import ConfigurationSpace, Configuration
-from ConfigSpace.forbidden import ForbiddenEqualsClause, ForbiddenAndConjunction
-
-from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
-
-from autosklearn.pipeline.components.classification import ClassifierChoice
-from autosklearn.pipeline.components.data_preprocessing.balancing.balancing import \
-    Balancing
-from autosklearn.pipeline.components.feature_preprocessing import FeaturePreprocessorChoice
 from autosklearn.pipeline.base import BasePipeline
+from autosklearn.pipeline.components.classification import ClassifierChoice
+from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
+from autosklearn.pipeline.components.data_preprocessing.balancing.balancing import (
+    Balancing,
+)
+from autosklearn.pipeline.components.feature_preprocessing import (
+    FeaturePreprocessorChoice,
+)
 from autosklearn.pipeline.constants import SPARSE
 
 
@@ -75,13 +76,13 @@ def __init__(
         include=None,
         exclude=None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params=None
+        init_params=None,
     ):
         self._output_dtype = np.int32
         if dataset_properties is None:
             dataset_properties = dict()
-        if 'target_type' not in dataset_properties:
-            dataset_properties['target_type'] = 'classification'
+        if "target_type" not in dataset_properties:
+            dataset_properties["target_type"] = "classification"
         super().__init__(
             config=config,
             steps=steps,
@@ -89,7 +90,7 @@ def __init__(
             include=include,
             exclude=exclude,
             random_state=random_state,
-            init_params=init_params
+            init_params=init_params,
         )
 
     def fit_transformer(self, X, y, fit_params=None):
@@ -97,21 +98,24 @@ def fit_transformer(self, X, y, fit_params=None):
         if fit_params is None:
             fit_params = {}
 
-        if self.config['balancing:strategy'] == 'weighting':
-            balancing = Balancing(strategy='weighting')
+        if self.config["balancing:strategy"] == "weighting":
+            balancing = Balancing(strategy="weighting")
             _init_params, _fit_params = balancing.get_weights(
-                y, self.config['classifier:__choice__'],
-                self.config['feature_preprocessor:__choice__'],
-                {}, {})
+                y,
+                self.config["classifier:__choice__"],
+                self.config["feature_preprocessor:__choice__"],
+                {},
+                {},
+            )
             _init_params.update(self.init_params)
-            self.set_hyperparameters(configuration=self.config,
-                                     init_params=_init_params)
+            self.set_hyperparameters(
+                configuration=self.config, init_params=_init_params
+            )
 
             if _fit_params is not None:
                 fit_params.update(_fit_params)
 
-        X, fit_params = super().fit_transformer(
-            X, y, fit_params=fit_params)
+        X, fit_params = super().fit_transformer(X, y, fit_params=fit_params)
 
         return X, fit_params
 
@@ -136,29 +140,34 @@ def predict_proba(self, X, batch_size=None):
 
         else:
             if not isinstance(batch_size, int):
-                raise ValueError("Argument 'batch_size' must be of type int, "
-                                 "but is '%s'" % type(batch_size))
+                raise ValueError(
+                    "Argument 'batch_size' must be of type int, "
+                    "but is '%s'" % type(batch_size)
+                )
             if batch_size <= 0:
-                raise ValueError("Argument 'batch_size' must be positive, "
-                                 "but is %d" % batch_size)
+                raise ValueError(
+                    "Argument 'batch_size' must be positive, " "but is %d" % batch_size
+                )
 
             else:
                 # Probe for the target array dimensions
                 target = self.predict_proba(X[0:2].copy())
 
-                y = np.zeros((X.shape[0], target.shape[1]),
-                             dtype=np.float32)
+                y = np.zeros((X.shape[0], target.shape[1]), dtype=np.float32)
 
                 for k in range(max(1, int(np.ceil(float(X.shape[0]) / batch_size)))):
                     batch_from = k * batch_size
                     batch_to = min([(k + 1) * batch_size, X.shape[0]])
-                    pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None)
+                    pred_prob = self.predict_proba(
+                        X[batch_from:batch_to], batch_size=None
+                    )
                     y[batch_from:batch_to] = pred_prob.astype(np.float32)
 
                 return y
 
-    def _get_hyperparameter_search_space(self, include=None, exclude=None,
-                                         dataset_properties=None):
+    def _get_hyperparameter_search_space(
+        self, include=None, exclude=None, dataset_properties=None
+    ):
         """Create the hyperparameter configuration space.
 
         Parameters
@@ -174,42 +183,52 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
 
         if dataset_properties is None or not isinstance(dataset_properties, dict):
             dataset_properties = dict()
-        if 'target_type' not in dataset_properties:
-            dataset_properties['target_type'] = 'classification'
-        if dataset_properties['target_type'] != 'classification':
-            dataset_properties['target_type'] = 'classification'
+        if "target_type" not in dataset_properties:
+            dataset_properties["target_type"] = "classification"
+        if dataset_properties["target_type"] != "classification":
+            dataset_properties["target_type"] = "classification"
 
-        if 'sparse' not in dataset_properties:
+        if "sparse" not in dataset_properties:
             # This dataset is probably dense
-            dataset_properties['sparse'] = False
+            dataset_properties["sparse"] = False
 
         cs = self._get_base_search_space(
-            cs=cs, dataset_properties=dataset_properties,
-            exclude=exclude, include=include, pipeline=self.steps)
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=self.steps,
+        )
 
-        classifiers = cs.get_hyperparameter('classifier:__choice__').choices
-        preprocessors = cs.get_hyperparameter('feature_preprocessor:__choice__').choices
+        classifiers = cs.get_hyperparameter("classifier:__choice__").choices
+        preprocessors = cs.get_hyperparameter("feature_preprocessor:__choice__").choices
         available_classifiers = self._final_estimator.get_available_components(
-            dataset_properties)
+            dataset_properties
+        )
 
-        possible_default_classifier = copy.copy(list(
-            available_classifiers.keys()))
-        default = cs.get_hyperparameter('classifier:__choice__').default_value
+        possible_default_classifier = copy.copy(list(available_classifiers.keys()))
+        default = cs.get_hyperparameter("classifier:__choice__").default_value
         del possible_default_classifier[possible_default_classifier.index(default)]
 
         # A classifier which can handle sparse data after the densifier is
         # forbidden for memory issues
         for key in classifiers:
-            if SPARSE in available_classifiers[key].get_properties()['input']:
-                if 'densifier' in preprocessors:
+            if SPARSE in available_classifiers[key].get_properties()["input"]:
+                if "densifier" in preprocessors:
                     while True:
                         try:
                             forb_cls = ForbiddenEqualsClause(
-                                cs.get_hyperparameter('classifier:__choice__'), key)
-                            forb_fpp = ForbiddenEqualsClause(cs.get_hyperparameter(
-                                'feature_preprocessor:__choice__'), 'densifier')
+                                cs.get_hyperparameter("classifier:__choice__"), key
+                            )
+                            forb_fpp = ForbiddenEqualsClause(
+                                cs.get_hyperparameter(
+                                    "feature_preprocessor:__choice__"
+                                ),
+                                "densifier",
+                            )
                             cs.add_forbidden_clause(
-                                ForbiddenAndConjunction(forb_cls, forb_fpp))
+                                ForbiddenAndConjunction(forb_cls, forb_fpp)
+                            )
                             # Success
                             break
                         except ValueError:
@@ -218,20 +237,29 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                                 default = possible_default_classifier.pop()
                             except IndexError:
                                 raise ValueError(
-                                    "Cannot find a legal default configuration.")
+                                    "Cannot find a legal default configuration."
+                                )
                             cs.get_hyperparameter(
-                                'classifier:__choice__').default_value = default
+                                "classifier:__choice__"
+                            ).default_value = default
 
         # which would take too long
         # Combinations of non-linear models with feature learning:
         classifiers_ = [
-            "adaboost", "decision_tree", "extra_trees",
-            "gradient_boosting", "k_nearest_neighbors",
-            "libsvm_svc", "mlp", "random_forest",
+            "adaboost",
+            "decision_tree",
+            "extra_trees",
+            "gradient_boosting",
+            "k_nearest_neighbors",
+            "libsvm_svc",
+            "mlp",
+            "random_forest",
             "gaussian_nb",
         ]
         feature_learning = [
-            "kernel_pca", "kitchen_sinks", "nystroem_sampler",
+            "kernel_pca",
+            "kitchen_sinks",
+            "nystroem_sampler",
         ]
 
         for c, f in product(classifiers_, feature_learning):
@@ -241,11 +269,19 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                 continue
             while True:
                 try:
-                    cs.add_forbidden_clause(ForbiddenAndConjunction(
-                        ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "classifier:__choice__"), c),
-                        ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "feature_preprocessor:__choice__"), f)))
+                    cs.add_forbidden_clause(
+                        ForbiddenAndConjunction(
+                            ForbiddenEqualsClause(
+                                cs.get_hyperparameter("classifier:__choice__"), c
+                            ),
+                            ForbiddenEqualsClause(
+                                cs.get_hyperparameter(
+                                    "feature_preprocessor:__choice__"
+                                ),
+                                f,
+                            ),
+                        )
+                    )
                     break
                 except KeyError:
                     break
@@ -254,16 +290,22 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                     try:
                         default = possible_default_classifier.pop()
                     except IndexError:
-                        raise ValueError(
-                            "Cannot find a legal default configuration.")
+                        raise ValueError("Cannot find a legal default configuration.")
                     cs.get_hyperparameter(
-                        'classifier:__choice__').default_value = default
+                        "classifier:__choice__"
+                    ).default_value = default
 
         # Won't work
         # Multinomial NB etc don't use with features learning, pca etc
         classifiers_ = ["multinomial_nb"]
-        preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD",
-                                   "fast_ica", "kernel_pca", "nystroem_sampler"]
+        preproc_with_negative_X = [
+            "kitchen_sinks",
+            "pca",
+            "truncatedSVD",
+            "fast_ica",
+            "kernel_pca",
+            "nystroem_sampler",
+        ]
 
         for c, f in product(classifiers_, preproc_with_negative_X):
             if c not in classifiers:
@@ -272,11 +314,19 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                 continue
             while True:
                 try:
-                    cs.add_forbidden_clause(ForbiddenAndConjunction(
-                        ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "feature_preprocessor:__choice__"), f),
-                        ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "classifier:__choice__"), c)))
+                    cs.add_forbidden_clause(
+                        ForbiddenAndConjunction(
+                            ForbiddenEqualsClause(
+                                cs.get_hyperparameter(
+                                    "feature_preprocessor:__choice__"
+                                ),
+                                f,
+                            ),
+                            ForbiddenEqualsClause(
+                                cs.get_hyperparameter("classifier:__choice__"), c
+                            ),
+                        )
+                    )
                     break
                 except KeyError:
                     break
@@ -285,10 +335,10 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                     try:
                         default = possible_default_classifier.pop()
                     except IndexError:
-                        raise ValueError(
-                            "Cannot find a legal default configuration.")
+                        raise ValueError("Cannot find a legal default configuration.")
                     cs.get_hyperparameter(
-                        'classifier:__choice__').default_value = default
+                        "classifier:__choice__"
+                    ).default_value = default
 
         self.configuration_space = cs
         self.dataset_properties = dataset_properties
@@ -297,30 +347,36 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
     def _get_pipeline_steps(self, dataset_properties):
         steps = []
 
-        default_dataset_properties = {'target_type': 'classification'}
+        default_dataset_properties = {"target_type": "classification"}
         if dataset_properties is not None and isinstance(dataset_properties, dict):
             default_dataset_properties.update(dataset_properties)
 
-        steps.extend([
-            [
-                "data_preprocessor", DataPreprocessorChoice(
-                    dataset_properties=default_dataset_properties,
-                    random_state=self.random_state)
-            ],
-            [
-                "balancing", Balancing(random_state=self.random_state)
-            ],
+        steps.extend(
             [
-                "feature_preprocessor", FeaturePreprocessorChoice(
-                    dataset_properties=default_dataset_properties,
-                    random_state=self.random_state)
-            ],
-            [
-                'classifier', ClassifierChoice(
-                    dataset_properties=default_dataset_properties,
-                    random_state=self.random_state)
+                [
+                    "data_preprocessor",
+                    DataPreprocessorChoice(
+                        dataset_properties=default_dataset_properties,
+                        random_state=self.random_state,
+                    ),
+                ],
+                ["balancing", Balancing(random_state=self.random_state)],
+                [
+                    "feature_preprocessor",
+                    FeaturePreprocessorChoice(
+                        dataset_properties=default_dataset_properties,
+                        random_state=self.random_state,
+                    ),
+                ],
+                [
+                    "classifier",
+                    ClassifierChoice(
+                        dataset_properties=default_dataset_properties,
+                        random_state=self.random_state,
+                    ),
+                ],
             ]
-        ])
+        )
 
         return steps
 
diff --git a/autosklearn/pipeline/components/base.py b/autosklearn/pipeline/components/base.py
index 5864a2a5d6..c4a95df08c 100644
--- a/autosklearn/pipeline/components/base.py
+++ b/autosklearn/pipeline/components/base.py
@@ -1,9 +1,10 @@
-from collections import OrderedDict
+from typing import Dict
+
 import importlib
 import inspect
 import pkgutil
 import sys
-from typing import Dict
+from collections import OrderedDict
 
 from sklearn.base import BaseEstimator, TransformerMixin
 
@@ -21,8 +22,11 @@ def find_components(package, directory, base_class):
             module = importlib.import_module(full_module_name)
 
             for member_name, obj in inspect.getmembers(module):
-                if inspect.isclass(obj) and issubclass(obj, base_class) and \
-                        obj != base_class:
+                if (
+                    inspect.isclass(obj)
+                    and issubclass(obj, base_class)
+                    and obj != base_class
+                ):
                     # TODO test if the obj implements the interface
                     # Keep in mind that this only instantiates the ensemble_wrapper,
                     # but not the real target classifier
@@ -42,24 +46,35 @@ def add_component(self, obj):
             name = obj.__name__
             classifier = obj
         else:
-            raise TypeError('add_component works only with a subclass of %s' %
-                            str(self.base_class))
+            raise TypeError(
+                "add_component works only with a subclass of %s" % str(self.base_class)
+            )
 
         properties = set(classifier.get_properties())
-        should_be_there = {'shortname', 'name', 'handles_regression',
-                           'handles_classification', 'handles_multiclass',
-                           'handles_multilabel', 'handles_multioutput',
-                           'is_deterministic', 'input', 'output'}
+        should_be_there = {
+            "shortname",
+            "name",
+            "handles_regression",
+            "handles_classification",
+            "handles_multiclass",
+            "handles_multilabel",
+            "handles_multioutput",
+            "is_deterministic",
+            "input",
+            "output",
+        }
         for property in properties:
             if property not in should_be_there:
-                raise ValueError('Property %s must not be specified for '
-                                 'algorithm %s. Only the following properties '
-                                 'can be specified: %s' %
-                                 (property, name, str(should_be_there)))
+                raise ValueError(
+                    "Property %s must not be specified for "
+                    "algorithm %s. Only the following properties "
+                    "can be specified: %s" % (property, name, str(should_be_there))
+                )
         for property in should_be_there:
             if property not in properties:
-                raise ValueError('Property %s not specified for algorithm %s' %
-                                 (property, name))
+                raise ValueError(
+                    "Property %s not specified for algorithm %s" % (property, name)
+                )
 
         self.components[name] = classifier
 
@@ -126,34 +141,35 @@ def set_hyperparameters(self, configuration, init_params=None):
 
         for param, value in params.items():
             if not hasattr(self, param):
-                raise ValueError('Cannot set hyperparameter %s for %s because '
-                                 'the hyperparameter does not exist.' %
-                                 (param, str(self)))
+                raise ValueError(
+                    "Cannot set hyperparameter %s for %s because "
+                    "the hyperparameter does not exist." % (param, str(self))
+                )
             setattr(self, param, value)
 
         if init_params is not None:
             for param, value in init_params.items():
                 if not hasattr(self, param):
-                    raise ValueError('Cannot set init param %s for %s because '
-                                     'the init param does not exist.' %
-                                     (param, str(self)))
+                    raise ValueError(
+                        "Cannot set init param %s for %s because "
+                        "the init param does not exist." % (param, str(self))
+                    )
                 setattr(self, param, value)
 
         return self
 
     def __str__(self):
-        name = self.get_properties()['name']
+        name = self.get_properties()["name"]
         return "autosklearn.pipeline %s" % name
 
 
 class IterativeComponent(AutoSklearnComponent):
-
     def fit(self, X, y, sample_weight=None):
         self.iterative_fit(X, y, n_iter=2, refit=True)
 
         iteration = 2
         while not self.configuration_fully_fitted():
-            n_iter = int(2 ** iteration / 2)
+            n_iter = int(2**iteration / 2)
             self.iterative_fit(X, y, n_iter=n_iter, refit=False)
             iteration += 1
 
@@ -168,14 +184,15 @@ def get_current_iter(self):
 
 
 class IterativeComponentWithSampleWeight(AutoSklearnComponent):
-
     def fit(self, X, y, sample_weight=None):
         self.iterative_fit(X, y, n_iter=2, refit=True, sample_weight=sample_weight)
 
         iteration = 2
         while not self.configuration_fully_fitted():
-            n_iter = int(2 ** iteration / 2)
-            self.iterative_fit(X, y, n_iter=n_iter, refit=False, sample_weight=sample_weight)
+            n_iter = int(2**iteration / 2)
+            self.iterative_fit(
+                X, y, n_iter=n_iter, refit=False, sample_weight=sample_weight
+            )
             iteration += 1
 
         return self
@@ -356,23 +373,25 @@ def __init__(self, dataset_properties, random_state=None):
     def get_components(cls):
         raise NotImplementedError()
 
-    def get_available_components(self, dataset_properties=None,
-                                 include=None,
-                                 exclude=None):
+    def get_available_components(
+        self, dataset_properties=None, include=None, exclude=None
+    ):
         if dataset_properties is None:
             dataset_properties = {}
 
         if include is not None and exclude is not None:
             raise ValueError(
-                "The argument include and exclude cannot be used together.")
+                "The argument include and exclude cannot be used together."
+            )
 
         available_comp = self.get_components()
 
         if include is not None:
             for incl in include:
                 if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
+                    raise ValueError(
+                        "Trying to include unknown component: " "%s" % incl
+                    )
 
         components_dict = OrderedDict()
         for name in available_comp:
@@ -381,14 +400,14 @@ def get_available_components(self, dataset_properties=None,
             elif exclude is not None and name in exclude:
                 continue
 
-            if 'sparse' in dataset_properties and dataset_properties['sparse']:
+            if "sparse" in dataset_properties and dataset_properties["sparse"]:
                 # In case the dataset is sparse, ignore
                 # components that do not handle sparse data
                 # Auto-sklearn uses SPARSE constant as a mechanism
                 # to indicate whether a component can handle sparse data.
                 # If SPARSE is not in the input properties of the component, it
                 # means SPARSE is not a valid input to this component, so filter it out
-                if SPARSE not in available_comp[name].get_properties()['input']:
+                if SPARSE not in available_comp[name].get_properties()["input"]:
                     continue
 
             components_dict[name] = available_comp[name]
@@ -399,29 +418,28 @@ def set_hyperparameters(self, configuration, init_params=None):
         new_params = {}
 
         params = configuration.get_dictionary()
-        choice = params['__choice__']
-        del params['__choice__']
+        choice = params["__choice__"]
+        del params["__choice__"]
 
         for param, value in params.items():
-            param = param.replace(choice, '').replace(':', '')
+            param = param.replace(choice, "").replace(":", "")
             new_params[param] = value
 
         if init_params is not None:
             for param, value in init_params.items():
-                param = param.replace(choice, '').replace(':', '')
+                param = param.replace(choice, "").replace(":", "")
                 new_params[param] = value
 
-        new_params['random_state'] = self.random_state
+        new_params["random_state"] = self.random_state
 
         self.new_params = new_params
         self.choice = self.get_components()[choice](**new_params)
 
         return self
 
-    def get_hyperparameter_search_space(self, dataset_properties=None,
-                                        default=None,
-                                        include=None,
-                                        exclude=None):
+    def get_hyperparameter_search_space(
+        self, dataset_properties=None, default=None, include=None, exclude=None
+    ):
         raise NotImplementedError()
 
     def fit(self, X, y, **kwargs):
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
index 2dca6623ae..c95334273a 100644
--- a/autosklearn/pipeline/components/classification/__init__.py
+++ b/autosklearn/pipeline/components/classification/__init__.py
@@ -1,20 +1,27 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
 
-from collections import OrderedDict
 from typing import Type
+
 import os
+from collections import OrderedDict
 
-from ..base import AutoSklearnClassificationAlgorithm, find_components, \
-    ThirdPartyComponents, AutoSklearnChoice, _addons
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from ..base import (
+    AutoSklearnChoice,
+    AutoSklearnClassificationAlgorithm,
+    ThirdPartyComponents,
+    _addons,
+    find_components,
+)
+
 classifier_directory = os.path.split(__file__)[0]
-_classifiers = find_components(__package__,
-                               classifier_directory,
-                               AutoSklearnClassificationAlgorithm)
+_classifiers = find_components(
+    __package__, classifier_directory, AutoSklearnClassificationAlgorithm
+)
 additional_components = ThirdPartyComponents(AutoSklearnClassificationAlgorithm)
-_addons['classification'] = additional_components
+_addons["classification"] = additional_components
 
 
 def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None:
@@ -22,7 +29,6 @@ def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None
 
 
 class ClassifierChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls):
         components = OrderedDict()
@@ -30,9 +36,9 @@ def get_components(cls):
         components.update(additional_components.components)
         return components
 
-    def get_available_components(cls, dataset_properties=None,
-                                 include=None,
-                                 exclude=None):
+    def get_available_components(
+        cls, dataset_properties=None, include=None, exclude=None
+    ):
         if dataset_properties is None:
             dataset_properties = {}
 
@@ -40,13 +46,16 @@ def get_available_components(cls, dataset_properties=None,
         components_dict = OrderedDict()
 
         if include is not None and exclude is not None:
-            raise ValueError("The argument include and exclude cannot be used together.")
+            raise ValueError(
+                "The argument include and exclude cannot be used together."
+            )
 
         if include is not None:
             for incl in include:
                 if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
+                    raise ValueError(
+                        "Trying to include unknown component: " "%s" % incl
+                    )
 
         for name in available_comp:
             if include is not None and name not in include:
@@ -60,43 +69,47 @@ def get_available_components(cls, dataset_properties=None,
             if entry == ClassifierChoice:
                 continue
 
-            if entry.get_properties()['handles_classification'] is False:
+            if entry.get_properties()["handles_classification"] is False:
                 continue
-            if dataset_properties.get('multiclass') is True and \
-               entry.get_properties()['handles_multiclass'] is False:
+            if (
+                dataset_properties.get("multiclass") is True
+                and entry.get_properties()["handles_multiclass"] is False
+            ):
                 continue
-            if dataset_properties.get('multilabel') is True and \
-               available_comp[name].get_properties()['handles_multilabel'] is False:
+            if (
+                dataset_properties.get("multilabel") is True
+                and available_comp[name].get_properties()["handles_multilabel"] is False
+            ):
                 continue
             components_dict[name] = entry
 
         return components_dict
 
-    def get_hyperparameter_search_space(self, dataset_properties=None,
-                                        default=None,
-                                        include=None,
-                                        exclude=None):
+    def get_hyperparameter_search_space(
+        self, dataset_properties=None, default=None, include=None, exclude=None
+    ):
         if dataset_properties is None:
             dataset_properties = {}
 
         if include is not None and exclude is not None:
-            raise ValueError("The arguments include and "
-                             "exclude cannot be used together.")
+            raise ValueError(
+                "The arguments include and " "exclude cannot be used together."
+            )
 
         cs = ConfigurationSpace()
 
         # Compile a list of all estimator objects for this problem
         available_estimators = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include,
-            exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_estimators) == 0:
             raise ValueError("No classifiers found")
 
         if default is None:
-            defaults = ['random_forest', 'liblinear_svc', 'sgd',
-                        'libsvm_svc'] + list(available_estimators.keys())
+            defaults = ["random_forest", "liblinear_svc", "sgd", "libsvm_svc"] + list(
+                available_estimators.keys()
+            )
             for default_ in defaults:
                 if default_ in available_estimators:
                     if include is not None and default_ not in include:
@@ -106,18 +119,20 @@ def get_hyperparameter_search_space(self, dataset_properties=None,
                     default = default_
                     break
 
-        estimator = CategoricalHyperparameter('__choice__',
-                                              list(available_estimators.keys()),
-                                              default_value=default)
+        estimator = CategoricalHyperparameter(
+            "__choice__", list(available_estimators.keys()), default_value=default
+        )
         cs.add_hyperparameter(estimator)
         for estimator_name in available_estimators.keys():
-            estimator_configuration_space = available_estimators[estimator_name].\
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': estimator,
-                                     'value': estimator_name}
-            cs.add_configuration_space(estimator_name,
-                                       estimator_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+            estimator_configuration_space = available_estimators[
+                estimator_name
+            ].get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": estimator, "value": estimator_name}
+            cs.add_configuration_space(
+                estimator_name,
+                estimator_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
 
         self.configuration_space = cs
         self.dataset_properties = dataset_properties
@@ -127,7 +142,7 @@ def predict_proba(self, X):
         return self.choice.predict_proba(X)
 
     def estimator_supports_iterative_fit(self):
-        return hasattr(self.choice, 'iterative_fit')
+        return hasattr(self.choice, "iterative_fit")
 
     def get_max_iter(self):
         if self.estimator_supports_iterative_fit():
diff --git a/autosklearn/pipeline/components/classification/adaboost.py b/autosklearn/pipeline/components/classification/adaboost.py
index 31567aaeae..3634f53956 100644
--- a/autosklearn/pipeline/components/classification/adaboost.py
+++ b/autosklearn/pipeline/components/classification/adaboost.py
@@ -1,15 +1,18 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
 
 class AdaboostClassifier(AutoSklearnClassificationAlgorithm):
-
-    def __init__(self, n_estimators, learning_rate, algorithm, max_depth,
-                 random_state=None):
+    def __init__(
+        self, n_estimators, learning_rate, algorithm, max_depth, random_state=None
+    ):
         self.n_estimators = n_estimators
         self.learning_rate = learning_rate
         self.algorithm = algorithm
@@ -31,7 +34,7 @@ def fit(self, X, Y, sample_weight=None):
             n_estimators=self.n_estimators,
             learning_rate=self.learning_rate,
             algorithm=self.algorithm,
-            random_state=self.random_state
+            random_state=self.random_state,
         )
 
         estimator.fit(X, Y, sample_weight=sample_weight)
@@ -51,29 +54,35 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'AB',
-                'name': 'AdaBoost Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "AB",
+            "name": "AdaBoost Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_estimators = UniformIntegerHyperparameter(
-            name="n_estimators", lower=50, upper=500, default_value=50, log=False)
+            name="n_estimators", lower=50, upper=500, default_value=50, log=False
+        )
         learning_rate = UniformFloatHyperparameter(
-            name="learning_rate", lower=0.01, upper=2, default_value=0.1, log=True)
+            name="learning_rate", lower=0.01, upper=2, default_value=0.1, log=True
+        )
         algorithm = CategoricalHyperparameter(
-            name="algorithm", choices=["SAMME.R", "SAMME"], default_value="SAMME.R")
+            name="algorithm", choices=["SAMME.R", "SAMME"], default_value="SAMME.R"
+        )
         max_depth = UniformIntegerHyperparameter(
-            name="max_depth", lower=1, upper=10, default_value=1, log=False)
+            name="max_depth", lower=1, upper=10, default_value=1, log=False
+        )
 
         cs.add_hyperparameters([n_estimators, learning_rate, algorithm, max_depth])
         return cs
diff --git a/autosklearn/pipeline/components/classification/bernoulli_nb.py b/autosklearn/pipeline/components/classification/bernoulli_nb.py
index 9bb2f8c590..8271c5f602 100644
--- a/autosklearn/pipeline/components/classification/bernoulli_nb.py
+++ b/autosklearn/pipeline/components/classification/bernoulli_nb.py
@@ -1,13 +1,12 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter
-
-from autosklearn.pipeline.components.base import (
-    AutoSklearnClassificationAlgorithm,
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+
+from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
 
@@ -23,14 +22,18 @@ def fit(self, X, y):
         import sklearn.naive_bayes
 
         self.fit_prior = check_for_bool(self.fit_prior)
-        self.estimator = sklearn.naive_bayes.BernoulliNB(alpha=self.alpha, fit_prior=self.fit_prior)
+        self.estimator = sklearn.naive_bayes.BernoulliNB(
+            alpha=self.alpha, fit_prior=self.fit_prior
+        )
         self.classes_ = np.unique(y.astype(int))
 
         # Fallback for multilabel classification
         if len(y.shape) > 1 and y.shape[1] > 1:
             import sklearn.multiclass
+
             self.estimator = sklearn.multiclass.OneVsRestClassifier(
-                self.estimator, n_jobs=1)
+                self.estimator, n_jobs=1
+            )
         self.estimator.fit(X, y)
 
         return self
@@ -47,16 +50,18 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'BernoulliNB',
-                'name': 'Bernoulli Naive Bayes classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "BernoulliNB",
+            "name": "Bernoulli Naive Bayes classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -65,12 +70,13 @@ def get_hyperparameter_search_space(dataset_properties=None):
         # the smoothing parameter is a non-negative float
         # I will limit it to 1000 and put it on a logarithmic scale. (SF)
         # Please adjust that, if you know a proper range, this is just a guess.
-        alpha = UniformFloatHyperparameter(name="alpha", lower=1e-2, upper=100,
-                                           default_value=1, log=True)
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=1e-2, upper=100, default_value=1, log=True
+        )
 
-        fit_prior = CategoricalHyperparameter(name="fit_prior",
-                                              choices=["True", "False"],
-                                              default_value="True")
+        fit_prior = CategoricalHyperparameter(
+            name="fit_prior", choices=["True", "False"], default_value="True"
+        )
 
         cs.add_hyperparameters([alpha, fit_prior])
 
diff --git a/autosklearn/pipeline/components/classification/decision_tree.py b/autosklearn/pipeline/components/classification/decision_tree.py
index 045e5c3e44..fbfc6b7c6a 100644
--- a/autosklearn/pipeline/components/classification/decision_tree.py
+++ b/autosklearn/pipeline/components/classification/decision_tree.py
@@ -1,22 +1,35 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, \
-    UnParametrizedHyperparameter, Constant
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
-from autosklearn.pipeline.implementations.util import convert_multioutput_multiclass_to_multilabel
+from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
+from autosklearn.pipeline.implementations.util import (
+    convert_multioutput_multiclass_to_multilabel,
+)
 from autosklearn.util.common import check_none
 
 
 class DecisionTree(AutoSklearnClassificationAlgorithm):
-    def __init__(self, criterion, max_features, max_depth_factor,
-                 min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
-                 max_leaf_nodes, min_impurity_decrease, class_weight=None,
-                 random_state=None):
+    def __init__(
+        self,
+        criterion,
+        max_features,
+        max_depth_factor,
+        min_samples_split,
+        min_samples_leaf,
+        min_weight_fraction_leaf,
+        max_leaf_nodes,
+        min_impurity_decrease,
+        class_weight=None,
+        random_state=None,
+    ):
         self.criterion = criterion
         self.max_features = max_features
         self.max_depth_factor = max_depth_factor
@@ -40,8 +53,8 @@ def fit(self, X, y, sample_weight=None):
             num_features = X.shape[1]
             self.max_depth_factor = int(self.max_depth_factor)
             max_depth_factor = max(
-                1,
-                int(np.round(self.max_depth_factor * num_features, 0)))
+                1, int(np.round(self.max_depth_factor * num_features, 0))
+            )
         self.min_samples_split = int(self.min_samples_split)
         self.min_samples_leaf = int(self.min_samples_leaf)
         if check_none(self.max_leaf_nodes):
@@ -60,7 +73,8 @@ def fit(self, X, y, sample_weight=None):
             min_weight_fraction_leaf=self.min_weight_fraction_leaf,
             min_impurity_decrease=self.min_impurity_decrease,
             class_weight=self.class_weight,
-            random_state=self.random_state)
+            random_state=self.random_state,
+        )
         self.estimator.fit(X, y, sample_weight=sample_weight)
         return self
 
@@ -78,37 +92,53 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'DT',
-                'name': 'Decision Tree Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "DT",
+            "name": "Decision Tree Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
         criterion = CategoricalHyperparameter(
-            "criterion", ["gini", "entropy"], default_value="gini")
+            "criterion", ["gini", "entropy"], default_value="gini"
+        )
         max_depth_factor = UniformFloatHyperparameter(
-            'max_depth_factor', 0., 2., default_value=0.5)
+            "max_depth_factor", 0.0, 2.0, default_value=0.5
+        )
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
         min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0)
-        max_features = UnParametrizedHyperparameter('max_features', 1.0)
+        max_features = UnParametrizedHyperparameter("max_features", 1.0)
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
-        min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
+        min_impurity_decrease = UnParametrizedHyperparameter(
+            "min_impurity_decrease", 0.0
+        )
 
-        cs.add_hyperparameters([criterion, max_features, max_depth_factor,
-                                min_samples_split, min_samples_leaf,
-                                min_weight_fraction_leaf, max_leaf_nodes,
-                                min_impurity_decrease])
+        cs.add_hyperparameters(
+            [
+                criterion,
+                max_features,
+                max_depth_factor,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                max_leaf_nodes,
+                min_impurity_decrease,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/classification/extra_trees.py b/autosklearn/pipeline/components/classification/extra_trees.py
index bc3eb5e4d7..5c7ce1879a 100644
--- a/autosklearn/pipeline/components/classification/extra_trees.py
+++ b/autosklearn/pipeline/components/classification/extra_trees.py
@@ -1,13 +1,19 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, UnParametrizedHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
-from autosklearn.pipeline.implementations.util import convert_multioutput_multiclass_to_multilabel
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
+from autosklearn.pipeline.implementations.util import (
+    convert_multioutput_multiclass_to_multilabel,
+)
 from autosklearn.util.common import check_for_bool, check_none
 
 
@@ -15,12 +21,23 @@ class ExtraTreesClassifier(
     IterativeComponentWithSampleWeight,
     AutoSklearnClassificationAlgorithm,
 ):
-
-    def __init__(self, criterion, min_samples_leaf,
-                 min_samples_split,  max_features, bootstrap, max_leaf_nodes,
-                 max_depth, min_weight_fraction_leaf, min_impurity_decrease,
-                 oob_score=False, n_jobs=1, random_state=None, verbose=0,
-                 class_weight=None):
+    def __init__(
+        self,
+        criterion,
+        min_samples_leaf,
+        min_samples_split,
+        max_features,
+        bootstrap,
+        max_leaf_nodes,
+        max_depth,
+        min_weight_fraction_leaf,
+        min_impurity_decrease,
+        oob_score=False,
+        n_jobs=1,
+        random_state=None,
+        verbose=0,
+        class_weight=None,
+    ):
 
         self.n_estimators = self.get_max_iter()
         self.criterion = criterion
@@ -55,8 +72,9 @@ def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False):
         if self.estimator is None:
             max_features = int(X.shape[1] ** float(self.max_features))
             if self.criterion not in ("gini", "entropy"):
-                raise ValueError("'criterion' is not in ('gini', 'entropy'): "
-                                 "%s" % self.criterion)
+                raise ValueError(
+                    "'criterion' is not in ('gini', 'entropy'): " "%s" % self.criterion
+                )
 
             if check_none(self.max_depth):
                 self.max_depth = None
@@ -77,27 +95,30 @@ def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False):
             self.n_jobs = int(self.n_jobs)
             self.verbose = int(self.verbose)
 
-            self.estimator = ETC(n_estimators=n_iter,
-                                 criterion=self.criterion,
-                                 max_depth=self.max_depth,
-                                 min_samples_split=self.min_samples_split,
-                                 min_samples_leaf=self.min_samples_leaf,
-                                 bootstrap=self.bootstrap,
-                                 max_features=max_features,
-                                 max_leaf_nodes=self.max_leaf_nodes,
-                                 min_weight_fraction_leaf=self.min_weight_fraction_leaf,
-                                 min_impurity_decrease=self.min_impurity_decrease,
-                                 oob_score=self.oob_score,
-                                 n_jobs=self.n_jobs,
-                                 verbose=self.verbose,
-                                 random_state=self.random_state,
-                                 class_weight=self.class_weight,
-                                 warm_start=True)
+            self.estimator = ETC(
+                n_estimators=n_iter,
+                criterion=self.criterion,
+                max_depth=self.max_depth,
+                min_samples_split=self.min_samples_split,
+                min_samples_leaf=self.min_samples_leaf,
+                bootstrap=self.bootstrap,
+                max_features=max_features,
+                max_leaf_nodes=self.max_leaf_nodes,
+                min_weight_fraction_leaf=self.min_weight_fraction_leaf,
+                min_impurity_decrease=self.min_impurity_decrease,
+                oob_score=self.oob_score,
+                n_jobs=self.n_jobs,
+                verbose=self.verbose,
+                random_state=self.random_state,
+                class_weight=self.class_weight,
+                warm_start=True,
+            )
 
         else:
             self.estimator.n_estimators += n_iter
-            self.estimator.n_estimators = min(self.estimator.n_estimators,
-                                              self.n_estimators)
+            self.estimator.n_estimators = min(
+                self.estimator.n_estimators, self.n_estimators
+            )
 
         self.estimator.fit(X, y, sample_weight=sample_weight)
         return self
@@ -121,46 +142,67 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'ET',
-                'name': 'Extra Trees Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "ET",
+            "name": "Extra Trees Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
         criterion = CategoricalHyperparameter(
-            "criterion", ["gini", "entropy"], default_value="gini")
-
-        # The maximum number of features used in the forest is calculated as m^max_features, where
-        # m is the total number of features, and max_features is the hyperparameter specified below.
-        # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
-        # corresponds with Geurts' heuristic.
+            "criterion", ["gini", "entropy"], default_value="gini"
+        )
+
+        # The maximum number of features used in the forest is calculated as
+        # m^max_features, where m is the total number of features,
+        # and max_features is the hyperparameter specified below.
+        # The default is 0.5, which yields sqrt(m) features as max_features
+        # in the estimator. This corresponds with Geurts' heuristic.
         max_features = UniformFloatHyperparameter(
-            "max_features", 0., 1., default_value=0.5)
+            "max_features", 0.0, 1.0, default_value=0.5
+        )
 
         max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
 
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
-        min_weight_fraction_leaf = UnParametrizedHyperparameter('min_weight_fraction_leaf', 0.)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
+        min_weight_fraction_leaf = UnParametrizedHyperparameter(
+            "min_weight_fraction_leaf", 0.0
+        )
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
-        min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
+        min_impurity_decrease = UnParametrizedHyperparameter(
+            "min_impurity_decrease", 0.0
+        )
 
         bootstrap = CategoricalHyperparameter(
-            "bootstrap", ["True", "False"], default_value="False")
-        cs.add_hyperparameters([criterion, max_features,
-                                max_depth, min_samples_split, min_samples_leaf,
-                                min_weight_fraction_leaf, max_leaf_nodes,
-                                min_impurity_decrease, bootstrap])
+            "bootstrap", ["True", "False"], default_value="False"
+        )
+        cs.add_hyperparameters(
+            [
+                criterion,
+                max_features,
+                max_depth,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                max_leaf_nodes,
+                min_impurity_decrease,
+                bootstrap,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/classification/gaussian_nb.py b/autosklearn/pipeline/components/classification/gaussian_nb.py
index cae1733baf..8e978e9631 100644
--- a/autosklearn/pipeline/components/classification/gaussian_nb.py
+++ b/autosklearn/pipeline/components/classification/gaussian_nb.py
@@ -1,15 +1,11 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
 
-from autosklearn.pipeline.components.base import (
-    AutoSklearnClassificationAlgorithm,
-)
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 
 
 class GaussianNB(AutoSklearnClassificationAlgorithm):
-
     def __init__(self, random_state=None, verbose=0):
 
         self.random_state = random_state
@@ -25,8 +21,10 @@ def fit(self, X, y):
         # Fallback for multilabel classification
         if len(y.shape) > 1 and y.shape[1] > 1:
             import sklearn.multiclass
+
             self.estimator = sklearn.multiclass.OneVsRestClassifier(
-                self.estimator, n_jobs=1)
+                self.estimator, n_jobs=1
+            )
         self.estimator.fit(X, y)
 
         return self
@@ -43,16 +41,18 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'GaussianNB',
-                'name': 'Gaussian Naive Bayes classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "GaussianNB",
+            "name": "Gaussian Naive Bayes classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
diff --git a/autosklearn/pipeline/components/classification/gradient_boosting.py b/autosklearn/pipeline/components/classification/gradient_boosting.py
index 0faca0faa2..50b0b284bd 100644
--- a/autosklearn/pipeline/components/classification/gradient_boosting.py
+++ b/autosklearn/pipeline/components/classification/gradient_boosting.py
@@ -1,26 +1,42 @@
 import numpy as np
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, UnParametrizedHyperparameter, Constant, \
-    CategoricalHyperparameter
 from ConfigSpace.conditions import EqualsCondition, InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
-    IterativeComponentWithSampleWeight)
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS
+    IterativeComponentWithSampleWeight,
+)
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.util.common import check_none
 
 
 class GradientBoostingClassifier(
-    IterativeComponentWithSampleWeight,
-    AutoSklearnClassificationAlgorithm
+    IterativeComponentWithSampleWeight, AutoSklearnClassificationAlgorithm
 ):
-    def __init__(self, loss, learning_rate, min_samples_leaf, max_depth,
-                 max_leaf_nodes, max_bins, l2_regularization, early_stop, tol, scoring,
-                 n_iter_no_change=0, validation_fraction=None, random_state=None,
-                 verbose=0):
+    def __init__(
+        self,
+        loss,
+        learning_rate,
+        min_samples_leaf,
+        max_depth,
+        max_leaf_nodes,
+        max_bins,
+        l2_regularization,
+        early_stop,
+        tol,
+        scoring,
+        n_iter_no_change=0,
+        validation_fraction=None,
+        random_state=None,
+        verbose=0,
+    ):
         self.loss = loss
         self.learning_rate = learning_rate
         self.max_iter = self.get_max_iter()
@@ -119,13 +135,14 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
             )
         else:
             self.estimator.max_iter += n_iter
-            self.estimator.max_iter = min(self.estimator.max_iter,
-                                          self.max_iter)
+            self.estimator.max_iter = min(self.estimator.max_iter, self.max_iter)
 
         self.estimator.fit(X, y, sample_weight=sample_weight)
 
-        if self.estimator.max_iter >= self.max_iter \
-           or self.estimator.max_iter > self.estimator.n_iter_:
+        if (
+            self.estimator.max_iter >= self.max_iter
+            or self.estimator.max_iter > self.estimator.n_iter_
+        ):
 
             self.fully_fit_ = True
 
@@ -134,7 +151,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
     def configuration_fully_fitted(self):
         if self.estimator is None:
             return False
-        elif not hasattr(self, 'fully_fit_'):
+        elif not hasattr(self, "fully_fit_"):
             return False
         else:
             return self.fully_fit_
@@ -151,53 +168,77 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'GB',
-                'name': 'Gradient Boosting Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "GB",
+            "name": "Gradient Boosting Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         loss = Constant("loss", "auto")
         learning_rate = UniformFloatHyperparameter(
-            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
+            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True)
-        max_depth = UnParametrizedHyperparameter(
-            name="max_depth", value="None")
+            name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True
+        )
+        max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
         max_leaf_nodes = UniformIntegerHyperparameter(
-            name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True)
+            name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True
+        )
         max_bins = Constant("max_bins", 255)
         l2_regularization = UniformFloatHyperparameter(
-            name="l2_regularization", lower=1E-10, upper=1, default_value=1E-10, log=True)
+            name="l2_regularization",
+            lower=1e-10,
+            upper=1,
+            default_value=1e-10,
+            log=True,
+        )
 
         early_stop = CategoricalHyperparameter(
-            name="early_stop", choices=["off", "valid", "train"], default_value="off")
-        tol = UnParametrizedHyperparameter(
-            name="tol", value=1e-7)
-        scoring = UnParametrizedHyperparameter(
-            name="scoring", value="loss")
+            name="early_stop", choices=["off", "valid", "train"], default_value="off"
+        )
+        tol = UnParametrizedHyperparameter(name="tol", value=1e-7)
+        scoring = UnParametrizedHyperparameter(name="scoring", value="loss")
         n_iter_no_change = UniformIntegerHyperparameter(
-            name="n_iter_no_change", lower=1, upper=20, default_value=10)
+            name="n_iter_no_change", lower=1, upper=20, default_value=10
+        )
         validation_fraction = UniformFloatHyperparameter(
-            name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1)
-
-        cs.add_hyperparameters([loss, learning_rate, min_samples_leaf,
-                                max_depth, max_leaf_nodes, max_bins, l2_regularization,
-                                early_stop, tol, scoring, n_iter_no_change,
-                                validation_fraction])
+            name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1
+        )
+
+        cs.add_hyperparameters(
+            [
+                loss,
+                learning_rate,
+                min_samples_leaf,
+                max_depth,
+                max_leaf_nodes,
+                max_bins,
+                l2_regularization,
+                early_stop,
+                tol,
+                scoring,
+                n_iter_no_change,
+                validation_fraction,
+            ]
+        )
 
         n_iter_no_change_cond = InCondition(
-            n_iter_no_change, early_stop, ["valid", "train"])
+            n_iter_no_change, early_stop, ["valid", "train"]
+        )
         validation_fraction_cond = EqualsCondition(
-            validation_fraction, early_stop, "valid")
+            validation_fraction, early_stop, "valid"
+        )
 
         cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond])
 
diff --git a/autosklearn/pipeline/components/classification/k_nearest_neighbors.py b/autosklearn/pipeline/components/classification/k_nearest_neighbors.py
index 6901451f11..fe55e0783d 100644
--- a/autosklearn/pipeline/components/classification/k_nearest_neighbors.py
+++ b/autosklearn/pipeline/components/classification/k_nearest_neighbors.py
@@ -1,12 +1,14 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, UniformIntegerHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
 
 class KNearestNeighborsClassifier(AutoSklearnClassificationAlgorithm):
-
     def __init__(self, n_neighbors, weights, p, random_state=None):
         self.n_neighbors = n_neighbors
         self.weights = weights
@@ -14,13 +16,12 @@ def __init__(self, n_neighbors, weights, p, random_state=None):
         self.random_state = random_state
 
     def fit(self, X, Y):
-        import sklearn.neighbors
         import sklearn.multiclass
+        import sklearn.neighbors
 
-        estimator = \
-            sklearn.neighbors.KNeighborsClassifier(n_neighbors=self.n_neighbors,
-                                                   weights=self.weights,
-                                                   p=self.p)
+        estimator = sklearn.neighbors.KNeighborsClassifier(
+            n_neighbors=self.n_neighbors, weights=self.weights, p=self.p
+        )
 
         if len(Y.shape) == 2 and Y.shape[1] > 1:
             self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1)
@@ -42,25 +43,29 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'KNN',
-                'name': 'K-Nearest Neighbor Classification',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "KNN",
+            "name": "K-Nearest Neighbor Classification",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_neighbors = UniformIntegerHyperparameter(
-            name="n_neighbors", lower=1, upper=100, log=True, default_value=1)
+            name="n_neighbors", lower=1, upper=100, log=True, default_value=1
+        )
         weights = CategoricalHyperparameter(
-            name="weights", choices=["uniform", "distance"], default_value="uniform")
+            name="weights", choices=["uniform", "distance"], default_value="uniform"
+        )
         p = CategoricalHyperparameter(name="p", choices=[1, 2], default_value=2)
         cs.add_hyperparameters([n_neighbors, weights, p])
 
diff --git a/autosklearn/pipeline/components/classification/lda.py b/autosklearn/pipeline/components/classification/lda.py
index 1897db78ca..29a08f80b5 100644
--- a/autosklearn/pipeline/components/classification/lda.py
+++ b/autosklearn/pipeline/components/classification/lda.py
@@ -1,17 +1,18 @@
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, CategoricalHyperparameter
 from ConfigSpace.conditions import EqualsCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
 from autosklearn.util.common import check_none
 
 
 class LDA(AutoSklearnClassificationAlgorithm):
-    def __init__(self, shrinkage, tol, shrinkage_factor=0.5,
-                 random_state=None):
+    def __init__(self, shrinkage, tol, shrinkage_factor=0.5, random_state=None):
         self.shrinkage = shrinkage
         self.tol = tol
         self.shrinkage_factor = shrinkage_factor
@@ -23,20 +24,21 @@ def fit(self, X, Y):
 
         if check_none(self.shrinkage):
             self.shrinkage_ = None
-            solver = 'svd'
+            solver = "svd"
         elif self.shrinkage == "auto":
-            self.shrinkage_ = 'auto'
-            solver = 'lsqr'
+            self.shrinkage_ = "auto"
+            solver = "lsqr"
         elif self.shrinkage == "manual":
             self.shrinkage_ = float(self.shrinkage_factor)
-            solver = 'lsqr'
+            solver = "lsqr"
         else:
             raise ValueError(self.shrinkage)
 
         self.tol = float(self.tol)
 
         estimator = sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
-            shrinkage=self.shrinkage_, tol=self.tol, solver=solver)
+            shrinkage=self.shrinkage_, tol=self.tol, solver=solver
+        )
 
         if len(Y.shape) == 2 and Y.shape[1] > 1:
             self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1)
@@ -60,25 +62,29 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'LDA',
-                'name': 'Linear Discriminant Analysis',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "LDA",
+            "name": "Linear Discriminant Analysis",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         shrinkage = CategoricalHyperparameter(
-            "shrinkage", ["None", "auto", "manual"], default_value="None")
-        shrinkage_factor = UniformFloatHyperparameter(
-            "shrinkage_factor", 0., 1., 0.5)
-        tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True)
+            "shrinkage", ["None", "auto", "manual"], default_value="None"
+        )
+        shrinkage_factor = UniformFloatHyperparameter("shrinkage_factor", 0.0, 1.0, 0.5)
+        tol = UniformFloatHyperparameter(
+            "tol", 1e-5, 1e-1, default_value=1e-4, log=True
+        )
         cs.add_hyperparameters([shrinkage, shrinkage_factor, tol])
 
         cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage, "manual"))
diff --git a/autosklearn/pipeline/components/classification/liblinear_svc.py b/autosklearn/pipeline/components/classification/liblinear_svc.py
index 9c625139f5..3f57ef8f94 100644
--- a/autosklearn/pipeline/components/classification/liblinear_svc.py
+++ b/autosklearn/pipeline/components/classification/liblinear_svc.py
@@ -1,20 +1,32 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter, Constant
-from ConfigSpace.forbidden import ForbiddenEqualsClause, \
-    ForbiddenAndConjunction
+from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
 from autosklearn.util.common import check_for_bool, check_none
 
 
 class LibLinear_SVC(AutoSklearnClassificationAlgorithm):
     # Liblinear is not deterministic as it uses a RNG inside
-    def __init__(self, penalty, loss, dual, tol, C, multi_class,
-                 fit_intercept, intercept_scaling, class_weight=None,
-                 random_state=None):
+    def __init__(
+        self,
+        penalty,
+        loss,
+        dual,
+        tol,
+        C,
+        multi_class,
+        fit_intercept,
+        intercept_scaling,
+        class_weight=None,
+        random_state=None,
+    ):
         self.penalty = penalty
         self.loss = loss
         self.dual = dual
@@ -28,8 +40,8 @@ def __init__(self, penalty, loss, dual, tol, C, multi_class,
         self.estimator = None
 
     def fit(self, X, Y):
-        import sklearn.svm
         import sklearn.multiclass
+        import sklearn.svm
 
         self.C = float(self.C)
         self.tol = float(self.tol)
@@ -43,16 +55,18 @@ def fit(self, X, Y):
         if check_none(self.class_weight):
             self.class_weight = None
 
-        estimator = sklearn.svm.LinearSVC(penalty=self.penalty,
-                                          loss=self.loss,
-                                          dual=self.dual,
-                                          tol=self.tol,
-                                          C=self.C,
-                                          class_weight=self.class_weight,
-                                          fit_intercept=self.fit_intercept,
-                                          intercept_scaling=self.intercept_scaling,
-                                          multi_class=self.multi_class,
-                                          random_state=self.random_state)
+        estimator = sklearn.svm.LinearSVC(
+            penalty=self.penalty,
+            loss=self.loss,
+            dual=self.dual,
+            tol=self.tol,
+            C=self.C,
+            class_weight=self.class_weight,
+            fit_intercept=self.fit_intercept,
+            intercept_scaling=self.intercept_scaling,
+            multi_class=self.multi_class,
+            random_state=self.random_state,
+        )
 
         if len(Y.shape) == 2 and Y.shape[1] > 1:
             self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1)
@@ -76,50 +90,51 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'Liblinear-SVC',
-                'name': 'Liblinear Support Vector Classification',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': False,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "Liblinear-SVC",
+            "name": "Liblinear Support Vector Classification",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": False,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
-        penalty = CategoricalHyperparameter(
-            "penalty", ["l1", "l2"], default_value="l2")
+        penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2")
         loss = CategoricalHyperparameter(
-            "loss", ["hinge", "squared_hinge"], default_value="squared_hinge")
+            "loss", ["hinge", "squared_hinge"], default_value="squared_hinge"
+        )
         dual = Constant("dual", "False")
         # This is set ad-hoc
         tol = UniformFloatHyperparameter(
-            "tol", 1e-5, 1e-1, default_value=1e-4, log=True)
-        C = UniformFloatHyperparameter(
-            "C", 0.03125, 32768, log=True, default_value=1.0)
+            "tol", 1e-5, 1e-1, default_value=1e-4, log=True
+        )
+        C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0)
         multi_class = Constant("multi_class", "ovr")
         # These are set ad-hoc
         fit_intercept = Constant("fit_intercept", "True")
         intercept_scaling = Constant("intercept_scaling", 1)
-        cs.add_hyperparameters([penalty, loss, dual, tol, C, multi_class,
-                                fit_intercept, intercept_scaling])
+        cs.add_hyperparameters(
+            [penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling]
+        )
 
         penalty_and_loss = ForbiddenAndConjunction(
-            ForbiddenEqualsClause(penalty, "l1"),
-            ForbiddenEqualsClause(loss, "hinge")
+            ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")
         )
         constant_penalty_and_loss = ForbiddenAndConjunction(
             ForbiddenEqualsClause(dual, "False"),
             ForbiddenEqualsClause(penalty, "l2"),
-            ForbiddenEqualsClause(loss, "hinge")
+            ForbiddenEqualsClause(loss, "hinge"),
         )
         penalty_and_dual = ForbiddenAndConjunction(
-            ForbiddenEqualsClause(dual, "False"),
-            ForbiddenEqualsClause(penalty, "l1")
+            ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l1")
         )
         cs.add_forbidden_clause(penalty_and_loss)
         cs.add_forbidden_clause(constant_penalty_and_loss)
diff --git a/autosklearn/pipeline/components/classification/libsvm_svc.py b/autosklearn/pipeline/components/classification/libsvm_svc.py
index 97c55be49d..ba423161c1 100644
--- a/autosklearn/pipeline/components/classification/libsvm_svc.py
+++ b/autosklearn/pipeline/components/classification/libsvm_svc.py
@@ -1,21 +1,35 @@
 import resource
 import sys
 
-from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.conditions import EqualsCondition, InCondition
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, \
-    UnParametrizedHyperparameter
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
 from autosklearn.util.common import check_for_bool, check_none
 
 
 class LibSVM_SVC(AutoSklearnClassificationAlgorithm):
-    def __init__(self, C, kernel, gamma, shrinking, tol, max_iter,
-                 class_weight=None, degree=3, coef0=0, random_state=None):
+    def __init__(
+        self,
+        C,
+        kernel,
+        gamma,
+        shrinking,
+        tol,
+        max_iter,
+        class_weight=None,
+        degree=3,
+        coef0=0,
+        random_state=None,
+    ):
         self.C = C
         self.kernel = kernel
         self.degree = degree
@@ -31,9 +45,9 @@ def __init__(self, C, kernel, gamma, shrinking, tol, max_iter,
     def fit(self, X, Y):
         import sklearn.svm
 
-        # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is
-        # calculated as 2/3 of the available memory (which is calculated as the memory limit minus
-        # the used memory)
+        # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM.
+        # The cache size is calculated as 2/3 of the available memory
+        # (which is calculated as the memory limit minus the used memory)
         try:
             # Retrieve memory limits imposed on the process
             soft, hard = resource.getrlimit(resource.RLIMIT_AS)
@@ -45,9 +59,9 @@ def fit(self, X, Y):
                 # Retrieve memory used by this process
                 maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024
 
-                # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms,
-                # it's in kilobytes
-                if sys.platform == 'darwin':
+                # In MacOS, the MaxRSS output of resource.getrusage in bytes;
+                # on other platforms, it's in kilobytes
+                if sys.platform == "darwin":
                     maxrss = maxrss / 1024
 
                 cache_size = (soft - maxrss) / 1.5
@@ -80,18 +94,20 @@ def fit(self, X, Y):
         if check_none(self.class_weight):
             self.class_weight = None
 
-        self.estimator = sklearn.svm.SVC(C=self.C,
-                                         kernel=self.kernel,
-                                         degree=self.degree,
-                                         gamma=self.gamma,
-                                         coef0=self.coef0,
-                                         shrinking=self.shrinking,
-                                         tol=self.tol,
-                                         class_weight=self.class_weight,
-                                         max_iter=self.max_iter,
-                                         random_state=self.random_state,
-                                         cache_size=cache_size,
-                                         decision_function_shape='ovr')
+        self.estimator = sklearn.svm.SVC(
+            C=self.C,
+            kernel=self.kernel,
+            degree=self.degree,
+            gamma=self.gamma,
+            coef0=self.coef0,
+            shrinking=self.shrinking,
+            tol=self.tol,
+            class_weight=self.class_weight,
+            max_iter=self.max_iter,
+            random_state=self.random_state,
+            cache_size=cache_size,
+            decision_function_shape="ovr",
+        )
         self.estimator.fit(X, Y)
         return self
 
@@ -109,41 +125,45 @@ def predict_proba(self, X):
     @staticmethod
     def get_properties(dataset_properties=None):
         return {
-            'shortname': 'LibSVM-SVC',
-            'name': 'LibSVM Support Vector Classification',
-            'handles_regression': False,
-            'handles_classification': True,
-            'handles_multiclass': True,
-            'handles_multilabel': False,
-            'handles_multioutput': False,
-            'is_deterministic': True,
-            'input': (DENSE, SPARSE, UNSIGNED_DATA),
-            'output': (PREDICTIONS,)}
+            "shortname": "LibSVM-SVC",
+            "name": "LibSVM Support Vector Classification",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
-        C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True,
-                                       default_value=1.0)
+        C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0)
         # No linear kernel here, because we have liblinear
-        kernel = CategoricalHyperparameter(name="kernel",
-                                           choices=["rbf", "poly", "sigmoid"],
-                                           default_value="rbf")
+        kernel = CategoricalHyperparameter(
+            name="kernel", choices=["rbf", "poly", "sigmoid"], default_value="rbf"
+        )
         degree = UniformIntegerHyperparameter("degree", 2, 5, default_value=3)
-        gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8,
-                                           log=True, default_value=0.1)
+        gamma = UniformFloatHyperparameter(
+            "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
+        )
         # TODO this is totally ad-hoc
         coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0)
         # probability is no hyperparameter, but an argument to the SVM algo
-        shrinking = CategoricalHyperparameter("shrinking", ["True", "False"],
-                                              default_value="True")
-        tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-3,
-                                         log=True)
+        shrinking = CategoricalHyperparameter(
+            "shrinking", ["True", "False"], default_value="True"
+        )
+        tol = UniformFloatHyperparameter(
+            "tol", 1e-5, 1e-1, default_value=1e-3, log=True
+        )
         # cache size is not a hyperparameter, but an argument to the program!
         max_iter = UnParametrizedHyperparameter("max_iter", -1)
 
         cs = ConfigurationSpace()
-        cs.add_hyperparameters([C, kernel, degree, gamma, coef0, shrinking,
-                                tol, max_iter])
+        cs.add_hyperparameters(
+            [C, kernel, degree, gamma, coef0, shrinking, tol, max_iter]
+        )
 
         degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
         coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"])
diff --git a/autosklearn/pipeline/components/classification/mlp.py b/autosklearn/pipeline/components/classification/mlp.py
index e26f2318cf..f7001d7bc1 100644
--- a/autosklearn/pipeline/components/classification/mlp.py
+++ b/autosklearn/pipeline/components/classification/mlp.py
@@ -1,30 +1,45 @@
 import copy
-import numpy as np
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, UnParametrizedHyperparameter, Constant, \
-    CategoricalHyperparameter
+import numpy as np
 from ConfigSpace.conditions import InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponent,
 )
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
 
-class MLPClassifier(
-    IterativeComponent,
-    AutoSklearnClassificationAlgorithm
-):
-    def __init__(self, hidden_layer_depth, num_nodes_per_layer, activation, alpha,
-                 learning_rate_init, early_stopping, solver, batch_size,
-                 n_iter_no_change, tol,
-                 shuffle, beta_1, beta_2, epsilon,
-                 validation_fraction=None,
-                 random_state=None, verbose=0):
+class MLPClassifier(IterativeComponent, AutoSklearnClassificationAlgorithm):
+    def __init__(
+        self,
+        hidden_layer_depth,
+        num_nodes_per_layer,
+        activation,
+        alpha,
+        learning_rate_init,
+        early_stopping,
+        solver,
+        batch_size,
+        n_iter_no_change,
+        tol,
+        shuffle,
+        beta_1,
+        beta_2,
+        epsilon,
+        validation_fraction=None,
+        random_state=None,
+        verbose=0,
+    ):
         self.hidden_layer_depth = hidden_layer_depth
         self.num_nodes_per_layer = num_nodes_per_layer
         self.max_iter = self.get_max_iter()
@@ -60,6 +75,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
         Set n_iter=2 for the same reason as for SGD
         """
         from sklearn.neural_network import MLPClassifier
+
         n_iter = max(n_iter, 2)
 
         if refit:
@@ -71,8 +87,9 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
             self.max_iter = int(self.max_iter)
             self.hidden_layer_depth = int(self.hidden_layer_depth)
             self.num_nodes_per_layer = int(self.num_nodes_per_layer)
-            self.hidden_layer_sizes = tuple(self.num_nodes_per_layer
-                                            for i in range(self.hidden_layer_depth))
+            self.hidden_layer_sizes = tuple(
+                self.num_nodes_per_layer for i in range(self.hidden_layer_depth)
+            )
             self.activation = str(self.activation)
             self.alpha = float(self.alpha)
             self.learning_rate_init = float(self.learning_rate_init)
@@ -88,7 +105,9 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
                 self.n_iter_no_change = int(self.n_iter_no_change)
                 self.early_stopping_val = True
             else:
-                raise ValueError("Set early stopping to unknown value %s" % self.early_stopping)
+                raise ValueError(
+                    "Set early stopping to unknown value %s" % self.early_stopping
+                )
             # elif self.early_stopping == "off":
             #     self.validation_fraction = 0
             #     self.tol = 10000
@@ -142,8 +161,10 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
             new_max_iter = min(self.max_iter - self.estimator.n_iter_, n_iter)
             self.estimator.max_iter = new_max_iter
         self.estimator.fit(X, y)
-        if self.estimator.n_iter_ >= self.max_iter or \
-                self.estimator._no_improvement_count > self.n_iter_no_change:
+        if (
+            self.estimator.n_iter_ >= self.max_iter
+            or self.estimator._no_improvement_count > self.n_iter_no_change
+        ):
             self._fully_fit = True
 
         return self
@@ -151,7 +172,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
     def configuration_fully_fitted(self):
         if self.estimator is None:
             return False
-        elif not hasattr(self, '_fully_fit'):
+        elif not hasattr(self, "_fully_fit"):
             return False
         else:
             return self._fully_fit
@@ -168,42 +189,55 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'MLP',
-                'name': 'Multilayer Percepton',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "MLP",
+            "name": "Multilayer Percepton",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
-        hidden_layer_depth = UniformIntegerHyperparameter(name="hidden_layer_depth",
-                                                          lower=1, upper=3, default_value=1)
-        num_nodes_per_layer = UniformIntegerHyperparameter(name="num_nodes_per_layer",
-                                                           lower=16, upper=264, default_value=32,
-                                                           log=True)
-        activation = CategoricalHyperparameter(name="activation", choices=['tanh', 'relu'],
-                                               default_value='relu')
-        alpha = UniformFloatHyperparameter(name="alpha", lower=1e-7, upper=1e-1, default_value=1e-4,
-                                           log=True)
-
-        learning_rate_init = UniformFloatHyperparameter(name="learning_rate_init",
-                                                        lower=1e-4, upper=0.5, default_value=1e-3,
-                                                        log=True)
+        hidden_layer_depth = UniformIntegerHyperparameter(
+            name="hidden_layer_depth", lower=1, upper=3, default_value=1
+        )
+        num_nodes_per_layer = UniformIntegerHyperparameter(
+            name="num_nodes_per_layer", lower=16, upper=264, default_value=32, log=True
+        )
+        activation = CategoricalHyperparameter(
+            name="activation", choices=["tanh", "relu"], default_value="relu"
+        )
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=1e-7, upper=1e-1, default_value=1e-4, log=True
+        )
+
+        learning_rate_init = UniformFloatHyperparameter(
+            name="learning_rate_init",
+            lower=1e-4,
+            upper=0.5,
+            default_value=1e-3,
+            log=True,
+        )
         # Not allowing to turn off early stopping
-        early_stopping = CategoricalHyperparameter(name="early_stopping",
-                                                   choices=["valid", "train"],  # , "off"],
-                                                   default_value="valid")
+        early_stopping = CategoricalHyperparameter(
+            name="early_stopping",
+            choices=["valid", "train"],  # , "off"],
+            default_value="valid",
+        )
         # Constants
-        n_iter_no_change = Constant(name="n_iter_no_change", value=32)  # default=10 is too low
+        n_iter_no_change = Constant(
+            name="n_iter_no_change", value=32
+        )  # default=10 is too low
         validation_fraction = Constant(name="validation_fraction", value=0.1)
         tol = UnParametrizedHyperparameter(name="tol", value=1e-4)
-        solver = Constant(name="solver", value='adam')
+        solver = Constant(name="solver", value="adam")
 
         # Relying on sklearn defaults for now
         batch_size = UnParametrizedHyperparameter(name="batch_size", value="auto")
@@ -221,17 +255,33 @@ def get_hyperparameter_search_space(dataset_properties=None):
         # max_fun --> only used when solver=lbfgs
         # activation=["identity", "logistic"] --> not useful for classification
 
-        cs.add_hyperparameters([hidden_layer_depth, num_nodes_per_layer,
-                                activation, alpha,
-                                learning_rate_init, early_stopping,
-                                n_iter_no_change, validation_fraction, tol,
-                                solver, batch_size, shuffle,
-                                beta_1, beta_2, epsilon])
+        cs.add_hyperparameters(
+            [
+                hidden_layer_depth,
+                num_nodes_per_layer,
+                activation,
+                alpha,
+                learning_rate_init,
+                early_stopping,
+                n_iter_no_change,
+                validation_fraction,
+                tol,
+                solver,
+                batch_size,
+                shuffle,
+                beta_1,
+                beta_2,
+                epsilon,
+            ]
+        )
 
-        validation_fraction_cond = InCondition(validation_fraction, early_stopping, ["valid"])
+        validation_fraction_cond = InCondition(
+            validation_fraction, early_stopping, ["valid"]
+        )
         cs.add_conditions([validation_fraction_cond])
         # We always use early stopping
-        # n_iter_no_change_cond = InCondition(n_iter_no_change, early_stopping, ["valid", "train"])
+        # n_iter_no_change_cond = \
+        #   InCondition(n_iter_no_change, early_stopping, ["valid", "train"])
         # tol_cond = InCondition(n_iter_no_change, early_stopping, ["valid", "train"])
         # cs.add_conditions([n_iter_no_change_cond, tol_cond])
         return cs
diff --git a/autosklearn/pipeline/components/classification/multinomial_nb.py b/autosklearn/pipeline/components/classification/multinomial_nb.py
index e678bd4c77..7b65be8a5c 100644
--- a/autosklearn/pipeline/components/classification/multinomial_nb.py
+++ b/autosklearn/pipeline/components/classification/multinomial_nb.py
@@ -1,18 +1,16 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter
-
-from autosklearn.pipeline.components.base import (
-    AutoSklearnClassificationAlgorithm,
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
 )
-from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, SIGNED_DATA
+
+from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SIGNED_DATA, SPARSE
 from autosklearn.util.common import check_for_bool
 
 
 class MultinomialNB(AutoSklearnClassificationAlgorithm):
-
     def __init__(self, alpha, fit_prior, random_state=None, verbose=0):
         self.alpha = alpha
         self.fit_prior = fit_prior
@@ -21,8 +19,8 @@ def __init__(self, alpha, fit_prior, random_state=None, verbose=0):
         self.estimator = None
 
     def fit(self, X, y):
-        import sklearn.naive_bayes
         import scipy.sparse
+        import sklearn.naive_bayes
 
         self.fit_prior = check_for_bool(self.fit_prior)
         self.alpha = float(self.alpha)
@@ -31,7 +29,7 @@ def fit(self, X, y):
         self.estimator = sklearn.naive_bayes.MultinomialNB(
             alpha=self.alpha,
             fit_prior=self.fit_prior,
-            )
+        )
         self.classes_ = np.unique(y.astype(int))
 
         # Because the pipeline guarantees that each feature is positive,
@@ -44,8 +42,10 @@ def fit(self, X, y):
         # Fallback for multilabel classification
         if len(y.shape) > 1 and y.shape[1] > 1:
             import sklearn.multiclass
+
             self.estimator = sklearn.multiclass.OneVsRestClassifier(
-                self.estimator, n_jobs=1)
+                self.estimator, n_jobs=1
+            )
         self.estimator.fit(X, y)
 
         return self
@@ -62,16 +62,18 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'MultinomialNB',
-                'name': 'Multinomial Naive Bayes classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, SIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "MultinomialNB",
+            "name": "Multinomial Naive Bayes classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, SIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -80,12 +82,13 @@ def get_hyperparameter_search_space(dataset_properties=None):
         # the smoothing parameter is a non-negative float
         # I will limit it to 100 and put it on a logarithmic scale. (SF)
         # Please adjust that, if you know a proper range, this is just a guess.
-        alpha = UniformFloatHyperparameter(name="alpha", lower=1e-2, upper=100,
-                                           default_value=1, log=True)
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=1e-2, upper=100, default_value=1, log=True
+        )
 
-        fit_prior = CategoricalHyperparameter(name="fit_prior",
-                                              choices=["True", "False"],
-                                              default_value="True")
+        fit_prior = CategoricalHyperparameter(
+            name="fit_prior", choices=["True", "False"], default_value="True"
+        )
 
         cs.add_hyperparameters([alpha, fit_prior])
 
diff --git a/autosklearn/pipeline/components/classification/passive_aggressive.py b/autosklearn/pipeline/components/classification/passive_aggressive.py
index 5fb1f1bbf7..494ea7db06 100644
--- a/autosklearn/pipeline/components/classification/passive_aggressive.py
+++ b/autosklearn/pipeline/components/classification/passive_aggressive.py
@@ -1,14 +1,16 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter, UnParametrizedHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
 )
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
 from autosklearn.util.common import check_for_bool
 
@@ -76,9 +78,11 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
         # Fallback for multilabel classification
         if len(y.shape) > 1 and y.shape[1] > 1:
             import sklearn.multiclass
+
             self.estimator.max_iter = self.get_max_iter()
             self.estimator = sklearn.multiclass.OneVsRestClassifier(
-                self.estimator, n_jobs=1)
+                self.estimator, n_jobs=1
+            )
             self.estimator.fit(X, y)
             self.fully_fit_ = True
         else:
@@ -91,7 +95,8 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
                 self.estimator._validate_params()
                 lr = "pa1" if self.estimator.loss == "hinge" else "pa2"
                 self.estimator._partial_fit(
-                    X, y,
+                    X,
+                    y,
                     alpha=1.0,
                     C=self.estimator.C,
                     loss="hinge",
@@ -100,12 +105,12 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
                     classes=None,
                     sample_weight=sample_weight,
                     coef_init=None,
-                    intercept_init=None
+                    intercept_init=None,
                 )
                 self.n_iter_ += self.estimator.n_iter_
                 if (
                     self.estimator.max_iter >= self.max_iter
-                        or self.estimator.max_iter > self.n_iter_
+                    or self.estimator.max_iter > self.n_iter_
                 ):
                     self.fully_fit_ = True
 
@@ -114,7 +119,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
     def configuration_fully_fitted(self):
         if self.estimator is None:
             return False
-        elif not hasattr(self, 'fully_fit_'):
+        elif not hasattr(self, "fully_fit_"):
             return False
         else:
             return self.fully_fit_
@@ -133,16 +138,18 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'PassiveAggressive Classifier',
-                'name': 'Passive Aggressive Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "PassiveAggressive Classifier",
+            "name": "Passive Aggressive Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -152,11 +159,13 @@ def get_hyperparameter_search_space(dataset_properties=None):
             "loss", ["hinge", "squared_hinge"], default_value="hinge"
         )
 
-        tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4,
-                                         log=True)
+        tol = UniformFloatHyperparameter(
+            "tol", 1e-5, 1e-1, default_value=1e-4, log=True
+        )
         # Note: Average could also be an Integer if > 1
-        average = CategoricalHyperparameter('average', ['False', 'True'],
-                                            default_value='False')
+        average = CategoricalHyperparameter(
+            "average", ["False", "True"], default_value="False"
+        )
 
         cs = ConfigurationSpace()
         cs.add_hyperparameters([loss, fit_intercept, tol, C, average])
diff --git a/autosklearn/pipeline/components/classification/qda.py b/autosklearn/pipeline/components/classification/qda.py
index 7405b21fae..7b25858392 100644
--- a/autosklearn/pipeline/components/classification/qda.py
+++ b/autosklearn/pipeline/components/classification/qda.py
@@ -1,16 +1,13 @@
+import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
 
-import numpy as np
-
 
 class QDA(AutoSklearnClassificationAlgorithm):
-
     def __init__(self, reg_param, random_state=None):
         self.reg_param = float(reg_param)
         self.estimator = None
@@ -18,11 +15,13 @@ def __init__(self, reg_param, random_state=None):
     def fit(self, X, Y):
         import sklearn.discriminant_analysis
 
-        estimator = sklearn.discriminant_analysis.\
-            QuadraticDiscriminantAnalysis(reg_param=self.reg_param)
+        estimator = sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis(
+            reg_param=self.reg_param
+        )
 
         if len(Y.shape) == 2 and Y.shape[1] > 1:
             import sklearn.multiclass
+
             self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1)
         else:
             self.estimator = estimator
@@ -32,16 +31,17 @@ def fit(self, X, Y):
         if len(Y.shape) == 2 and Y.shape[1] > 1:
             problems = []
             for est in self.estimator.estimators_:
-                problem = np.any(np.any([np.any(s <= 0.0) for s in
-                                         est.scalings_]))
+                problem = np.any(np.any([np.any(s <= 0.0) for s in est.scalings_]))
                 problems.append(problem)
             problem = np.any(problems)
         else:
-            problem = np.any(np.any([np.any(s <= 0.0) for s in
-                                     self.estimator.scalings_]))
+            problem = np.any(
+                np.any([np.any(s <= 0.0) for s in self.estimator.scalings_])
+            )
         if problem:
-            raise ValueError('Numerical problems in QDA. QDA.scalings_ '
-                             'contains values <= 0.0')
+            raise ValueError(
+                "Numerical problems in QDA. QDA.scalings_ " "contains values <= 0.0"
+            )
         return self
 
     def predict(self, X):
@@ -58,21 +58,22 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'QDA',
-                'name': 'Quadratic Discriminant Analysis',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "QDA",
+            "name": "Quadratic Discriminant Analysis",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
-        reg_param = UniformFloatHyperparameter('reg_param', 0.0, 1.0,
-                                               default_value=0.0)
+        reg_param = UniformFloatHyperparameter("reg_param", 0.0, 1.0, default_value=0.0)
         cs = ConfigurationSpace()
         cs.add_hyperparameter(reg_param)
         return cs
diff --git a/autosklearn/pipeline/components/classification/random_forest.py b/autosklearn/pipeline/components/classification/random_forest.py
index c2f4e9779a..6ccd720b3a 100644
--- a/autosklearn/pipeline/components/classification/random_forest.py
+++ b/autosklearn/pipeline/components/classification/random_forest.py
@@ -1,13 +1,19 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, UnParametrizedHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
-from autosklearn.pipeline.implementations.util import convert_multioutput_multiclass_to_multilabel
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
+from autosklearn.pipeline.implementations.util import (
+    convert_multioutput_multiclass_to_multilabel,
+)
 from autosklearn.util.common import check_for_bool, check_none
 
 
@@ -15,11 +21,21 @@ class RandomForest(
     IterativeComponentWithSampleWeight,
     AutoSklearnClassificationAlgorithm,
 ):
-    def __init__(self, criterion, max_features,
-                 max_depth, min_samples_split, min_samples_leaf,
-                 min_weight_fraction_leaf, bootstrap, max_leaf_nodes,
-                 min_impurity_decrease, random_state=None, n_jobs=1,
-                 class_weight=None):
+    def __init__(
+        self,
+        criterion,
+        max_features,
+        max_depth,
+        min_samples_split,
+        min_samples_leaf,
+        min_weight_fraction_leaf,
+        bootstrap,
+        max_leaf_nodes,
+        min_impurity_decrease,
+        random_state=None,
+        n_jobs=1,
+        class_weight=None,
+    ):
         self.n_estimators = self.get_max_iter()
         self.criterion = criterion
         self.max_features = max_features
@@ -88,11 +104,13 @@ def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False):
                 random_state=self.random_state,
                 n_jobs=self.n_jobs,
                 class_weight=self.class_weight,
-                warm_start=True)
+                warm_start=True,
+            )
         else:
             self.estimator.n_estimators += n_iter
-            self.estimator.n_estimators = min(self.estimator.n_estimators,
-                                              self.n_estimators)
+            self.estimator.n_estimators = min(
+                self.estimator.n_estimators, self.n_estimators
+            )
 
         self.estimator.fit(X, y, sample_weight=sample_weight)
         return self
@@ -117,42 +135,63 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'RF',
-                'name': 'Random Forest Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "RF",
+            "name": "Random Forest Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         criterion = CategoricalHyperparameter(
-            "criterion", ["gini", "entropy"], default_value="gini")
-
-        # The maximum number of features used in the forest is calculated as m^max_features, where
-        # m is the total number of features, and max_features is the hyperparameter specified below.
-        # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
-        # corresponds with Geurts' heuristic.
+            "criterion", ["gini", "entropy"], default_value="gini"
+        )
+
+        # The maximum number of features used in the forest is calculated as
+        # m^max_features, where m is the total number of features, and max_features
+        # is the hyperparameter specified below. The default is 0.5, which yields
+        # sqrt(m) features as max_features in the estimator.
+        # This corresponds with Geurts' heuristic.
         max_features = UniformFloatHyperparameter(
-            "max_features", 0., 1., default_value=0.5)
+            "max_features", 0.0, 1.0, default_value=0.5
+        )
 
         max_depth = UnParametrizedHyperparameter("max_depth", "None")
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
-        min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
+        min_weight_fraction_leaf = UnParametrizedHyperparameter(
+            "min_weight_fraction_leaf", 0.0
+        )
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
-        min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
+        min_impurity_decrease = UnParametrizedHyperparameter(
+            "min_impurity_decrease", 0.0
+        )
         bootstrap = CategoricalHyperparameter(
-            "bootstrap", ["True", "False"], default_value="True")
-        cs.add_hyperparameters([criterion, max_features,
-                                max_depth, min_samples_split, min_samples_leaf,
-                                min_weight_fraction_leaf, max_leaf_nodes,
-                                bootstrap, min_impurity_decrease])
+            "bootstrap", ["True", "False"], default_value="True"
+        )
+        cs.add_hyperparameters(
+            [
+                criterion,
+                max_features,
+                max_depth,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                max_leaf_nodes,
+                bootstrap,
+                min_impurity_decrease,
+            ]
+        )
         return cs
diff --git a/autosklearn/pipeline/components/classification/sgd.py b/autosklearn/pipeline/components/classification/sgd.py
index 6875541824..469c2605dd 100644
--- a/autosklearn/pipeline/components/classification/sgd.py
+++ b/autosklearn/pipeline/components/classification/sgd.py
@@ -1,13 +1,16 @@
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter, UnParametrizedHyperparameter
 from ConfigSpace.conditions import EqualsCondition, InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnClassificationAlgorithm,
     IterativeComponentWithSampleWeight,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.pipeline.implementations.util import softmax
 from autosklearn.util.common import check_for_bool
 
@@ -16,9 +19,21 @@ class SGD(
     IterativeComponentWithSampleWeight,
     AutoSklearnClassificationAlgorithm,
 ):
-    def __init__(self, loss, penalty, alpha, fit_intercept, tol,
-                 learning_rate, l1_ratio=0.15, epsilon=0.1,
-                 eta0=0.01, power_t=0.5, average=False, random_state=None):
+    def __init__(
+        self,
+        loss,
+        penalty,
+        alpha,
+        fit_intercept,
+        tol,
+        learning_rate,
+        l1_ratio=0.15,
+        epsilon=0.1,
+        eta0=0.01,
+        power_t=0.5,
+        average=False,
+        random_state=None,
+    ):
         self.max_iter = self.get_max_iter()
         self.loss = loss
         self.penalty = penalty
@@ -61,32 +76,31 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
             self.fully_fit_ = False
 
             self.alpha = float(self.alpha)
-            self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \
-                else 0.15
-            self.epsilon = float(self.epsilon) if self.epsilon is not None \
-                else 0.1
+            self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None else 0.15
+            self.epsilon = float(self.epsilon) if self.epsilon is not None else 0.1
             self.eta0 = float(self.eta0)
-            self.power_t = float(self.power_t) if self.power_t is not None \
-                else 0.5
+            self.power_t = float(self.power_t) if self.power_t is not None else 0.5
             self.average = check_for_bool(self.average)
             self.fit_intercept = check_for_bool(self.fit_intercept)
             self.tol = float(self.tol)
 
-            self.estimator = SGDClassifier(loss=self.loss,
-                                           penalty=self.penalty,
-                                           alpha=self.alpha,
-                                           fit_intercept=self.fit_intercept,
-                                           max_iter=n_iter,
-                                           tol=self.tol,
-                                           learning_rate=self.learning_rate,
-                                           l1_ratio=self.l1_ratio,
-                                           epsilon=self.epsilon,
-                                           eta0=self.eta0,
-                                           power_t=self.power_t,
-                                           shuffle=True,
-                                           average=self.average,
-                                           random_state=self.random_state,
-                                           warm_start=True)
+            self.estimator = SGDClassifier(
+                loss=self.loss,
+                penalty=self.penalty,
+                alpha=self.alpha,
+                fit_intercept=self.fit_intercept,
+                max_iter=n_iter,
+                tol=self.tol,
+                learning_rate=self.learning_rate,
+                l1_ratio=self.l1_ratio,
+                epsilon=self.epsilon,
+                eta0=self.eta0,
+                power_t=self.power_t,
+                shuffle=True,
+                average=self.average,
+                random_state=self.random_state,
+                warm_start=True,
+            )
             self.estimator.fit(X, y, sample_weight=sample_weight)
             self.n_iter_ = self.estimator.n_iter_
         else:
@@ -94,7 +108,8 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
             self.estimator.max_iter = min(self.estimator.max_iter, self.max_iter)
             self.estimator._validate_params()
             self.estimator._partial_fit(
-                X, y,
+                X,
+                y,
                 alpha=self.estimator.alpha,
                 C=1.0,
                 loss=self.estimator.loss,
@@ -103,11 +118,14 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
                 sample_weight=sample_weight,
                 classes=None,
                 coef_init=None,
-                intercept_init=None
+                intercept_init=None,
             )
             self.n_iter_ += self.estimator.n_iter_
 
-        if self.estimator.max_iter >= self.max_iter or self.estimator.max_iter > self.n_iter_:
+        if (
+            self.estimator.max_iter >= self.max_iter
+            or self.estimator.max_iter > self.n_iter_
+        ):
             self.fully_fit_ = True
 
         return self
@@ -115,7 +133,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None):
     def configuration_fully_fitted(self):
         if self.estimator is None:
             return False
-        elif not hasattr(self, 'fully_fit_'):
+        elif not hasattr(self, "fully_fit_"):
             return False
         else:
             return self.fully_fit_
@@ -137,16 +155,18 @@ def predict_proba(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'SGD Classifier',
-                'name': 'Stochastic Gradient Descent Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "SGD Classifier",
+            "name": "Stochastic Gradient Descent Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -156,44 +176,63 @@ def get_hyperparameter_search_space(dataset_properties=None):
             "loss",
             ["hinge", "log", "modified_huber", "squared_hinge", "perceptron"],
             default_value="log",
-            )
+        )
         penalty = CategoricalHyperparameter(
-            "penalty", ["l1", "l2", "elasticnet"], default_value="l2")
+            "penalty", ["l1", "l2", "elasticnet"], default_value="l2"
+        )
         alpha = UniformFloatHyperparameter(
-            "alpha", 1e-7, 1e-1, log=True, default_value=0.0001)
+            "alpha", 1e-7, 1e-1, log=True, default_value=0.0001
+        )
         l1_ratio = UniformFloatHyperparameter(
-            "l1_ratio", 1e-9, 1,  log=True, default_value=0.15)
+            "l1_ratio", 1e-9, 1, log=True, default_value=0.15
+        )
         fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
-        tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, log=True,
-                                         default_value=1e-4)
+        tol = UniformFloatHyperparameter(
+            "tol", 1e-5, 1e-1, log=True, default_value=1e-4
+        )
         epsilon = UniformFloatHyperparameter(
-            "epsilon", 1e-5, 1e-1, default_value=1e-4, log=True)
+            "epsilon", 1e-5, 1e-1, default_value=1e-4, log=True
+        )
         learning_rate = CategoricalHyperparameter(
-            "learning_rate", ["optimal", "invscaling", "constant"],
-            default_value="invscaling")
+            "learning_rate",
+            ["optimal", "invscaling", "constant"],
+            default_value="invscaling",
+        )
         eta0 = UniformFloatHyperparameter(
-            "eta0", 1e-7, 1e-1, default_value=0.01, log=True)
-        power_t = UniformFloatHyperparameter("power_t", 1e-5, 1,
-                                             default_value=0.5)
+            "eta0", 1e-7, 1e-1, default_value=0.01, log=True
+        )
+        power_t = UniformFloatHyperparameter("power_t", 1e-5, 1, default_value=0.5)
         average = CategoricalHyperparameter(
-            "average", ["False", "True"], default_value="False")
-        cs.add_hyperparameters([loss, penalty, alpha, l1_ratio, fit_intercept,
-                                tol, epsilon, learning_rate, eta0, power_t,
-                                average])
+            "average", ["False", "True"], default_value="False"
+        )
+        cs.add_hyperparameters(
+            [
+                loss,
+                penalty,
+                alpha,
+                l1_ratio,
+                fit_intercept,
+                tol,
+                epsilon,
+                learning_rate,
+                eta0,
+                power_t,
+                average,
+            ]
+        )
 
         # TODO add passive/aggressive here, although not properly documented?
         elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet")
         epsilon_condition = EqualsCondition(epsilon, loss, "modified_huber")
 
-        power_t_condition = EqualsCondition(power_t, learning_rate,
-                                            "invscaling")
+        power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling")
 
         # eta0 is only relevant if learning_rate!='optimal' according to code
         # https://github.com/scikit-learn/scikit-learn/blob/0.19.X/sklearn/
         # linear_model/sgd_fast.pyx#L603
-        eta0_in_inv_con = InCondition(eta0, learning_rate, ["invscaling",
-                                                            "constant"])
-        cs.add_conditions([elasticnet, epsilon_condition, power_t_condition,
-                           eta0_in_inv_con])
+        eta0_in_inv_con = InCondition(eta0, learning_rate, ["invscaling", "constant"])
+        cs.add_conditions(
+            [elasticnet, epsilon_condition, power_t_condition, eta0_in_inv_con]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
index 3ba5981965..5693efd441 100644
--- a/autosklearn/pipeline/components/data_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -1,18 +1,24 @@
+from typing import Dict, Optional, Type
+
 import os
 from collections import OrderedDict
-from typing import Dict, Optional, Type
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
+
 from autosklearn.pipeline.base import PIPELINE_DATA_DTYPE
 
-from ..base import find_components, \
-    ThirdPartyComponents, AutoSklearnChoice, AutoSklearnPreprocessingAlgorithm
+from ..base import (
+    AutoSklearnChoice,
+    AutoSklearnPreprocessingAlgorithm,
+    ThirdPartyComponents,
+    find_components,
+)
 
 classifier_directory = os.path.split(__file__)[0]
-_preprocessors = find_components(__package__,
-                                 classifier_directory,
-                                 AutoSklearnPreprocessingAlgorithm)
+_preprocessors = find_components(
+    __package__, classifier_directory, AutoSklearnPreprocessingAlgorithm
+)
 _addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
 
 
@@ -21,7 +27,6 @@ def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> N
 
 
 class DataPreprocessorChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls) -> OrderedDict:
         components: OrderedDict = OrderedDict()
@@ -29,23 +34,28 @@ def get_components(cls) -> OrderedDict:
         components.update(_addons.components)
         return components
 
-    def get_available_components(self, dataset_properties: Optional[Dict] = None,
-                                 include: Optional[Dict] = None,
-                                 exclude: Optional[Dict] = None) -> OrderedDict:
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict] = None,
+        include: Optional[Dict] = None,
+        exclude: Optional[Dict] = None,
+    ) -> OrderedDict:
         if dataset_properties is None:
             dataset_properties = {}
 
         if include is not None and exclude is not None:
             raise ValueError(
-                "The argument include and exclude cannot be used together.")
+                "The argument include and exclude cannot be used together."
+            )
 
         available_comp = self.get_components()
 
         if include is not None:
             for incl in include:
                 if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
+                    raise ValueError(
+                        "Trying to include unknown component: " "%s" % incl
+                    )
 
         # TODO check for task type classification and/or regression!
 
@@ -59,38 +69,47 @@ def get_available_components(self, dataset_properties: Optional[Dict] = None,
             entry = available_comp[name]
 
             # Exclude itself to avoid infinite loop
-            if entry == DataPreprocessorChoice or hasattr(entry, 'get_components'):
+            if entry == DataPreprocessorChoice or hasattr(entry, "get_components"):
                 continue
 
-            target_type = dataset_properties['target_type']
-            if target_type == 'classification':
-                if entry.get_properties()['handles_classification'] is False:
+            target_type = dataset_properties["target_type"]
+            if target_type == "classification":
+                if entry.get_properties()["handles_classification"] is False:
                     continue
-                if dataset_properties.get('multiclass') is True and \
-                        entry.get_properties()['handles_multiclass'] is False:
+                if (
+                    dataset_properties.get("multiclass") is True
+                    and entry.get_properties()["handles_multiclass"] is False
+                ):
                     continue
-                if dataset_properties.get('multilabel') is True and \
-                        entry.get_properties()['handles_multilabel'] is False:
+                if (
+                    dataset_properties.get("multilabel") is True
+                    and entry.get_properties()["handles_multilabel"] is False
+                ):
                     continue
 
-            elif target_type == 'regression':
-                if entry.get_properties()['handles_regression'] is False:
+            elif target_type == "regression":
+                if entry.get_properties()["handles_regression"] is False:
                     continue
-                if dataset_properties.get('multioutput') is True and \
-                        entry.get_properties()['handles_multioutput'] is False:
+                if (
+                    dataset_properties.get("multioutput") is True
+                    and entry.get_properties()["handles_multioutput"] is False
+                ):
                     continue
 
             else:
-                raise ValueError('Unknown target type %s' % target_type)
+                raise ValueError("Unknown target type %s" % target_type)
 
             components_dict[name] = entry
 
         return components_dict
 
-    def get_hyperparameter_search_space(self, dataset_properties: Optional[Dict] = None,
-                                        default: str = None,
-                                        include: Optional[Dict] = None,
-                                        exclude: Optional[Dict] = None) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict] = None,
+        default: str = None,
+        include: Optional[Dict] = None,
+        exclude: Optional[Dict] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
 
         if dataset_properties is None:
@@ -98,12 +117,11 @@ def get_hyperparameter_search_space(self, dataset_properties: Optional[Dict] = N
 
         # Compile a list of legal preprocessors for this problem
         available_preprocessors = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_preprocessors) == 0:
-            raise ValueError(
-                "No preprocessors found, please add NoPreprocessing")
+            raise ValueError("No preprocessors found, please add NoPreprocessing")
 
         if default is None:
             defaults = ["feature_type"]
@@ -112,43 +130,48 @@ def get_hyperparameter_search_space(self, dataset_properties: Optional[Dict] = N
                     default = default_
                     break
 
-        preprocessor = CategoricalHyperparameter('__choice__',
-                                                 list(available_preprocessors.keys()),
-                                                 default_value=default)
+        preprocessor = CategoricalHyperparameter(
+            "__choice__", list(available_preprocessors.keys()), default_value=default
+        )
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
             preprocessor_configuration_space = available_preprocessors[name](
-                dataset_properties=dataset_properties). \
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+                dataset_properties=dataset_properties
+            ).get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": preprocessor, "value": name}
+            cs.add_configuration_space(
+                name,
+                preprocessor_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
         return cs
 
     def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.choice.transform(X)
 
-    def set_hyperparameters(self, configuration: ConfigurationSpace,
-                            init_params: Optional[Dict] = None) -> 'DataPreprocessorChoice':
+    def set_hyperparameters(
+        self, configuration: ConfigurationSpace, init_params: Optional[Dict] = None
+    ) -> "DataPreprocessorChoice":
         config = {}
         params = configuration.get_dictionary()
-        choice = params['__choice__']
-        del params['__choice__']
+        choice = params["__choice__"]
+        del params["__choice__"]
 
         for param, value in params.items():
-            param = param.replace(choice, '').split(':', 1)[1]
+            param = param.replace(choice, "").split(":", 1)[1]
             config[param] = value
 
         new_params = {}
         feat_type = None
         if init_params is not None:
             for param, value in init_params.items():
-                param = param.replace(choice, '').split(':', 1)[-1]
+                param = param.replace(choice, "").split(":", 1)[-1]
                 if "feat_type" in param:
                     feat_type = value
                 else:
                     new_params[param] = value
-        self.choice = self.get_components()[choice](config=config, init_params=new_params,
-                                                    feat_type=feat_type)
+        self.choice = self.get_components()[choice](
+            config=config, init_params=new_params, feat_type=feat_type
+        )
 
         return self
diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
index 7850a1665b..721fe63fc5 100644
--- a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
+++ b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
@@ -1,38 +1,47 @@
-from typing import Any, List, Dict, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
-
 from sklearn.base import BaseEstimator
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, SIGNED_DATA, INPUT
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
 
 
 class Balancing(AutoSklearnPreprocessingAlgorithm):
     def __init__(
         self,
-        strategy: str = 'none',
+        strategy: str = "none",
         random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         self.strategy = strategy
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None) -> 'Balancing':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "Balancing":
         self.fitted_ = True
         return self
 
     def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X
 
-    def get_weights(self, Y: PIPELINE_DATA_DTYPE,
-                    classifier: BaseEstimator, preprocessor: BaseEstimator,
-                    init_params: Optional[Dict[str, Any]], fit_params: Optional[Dict[str, Any]],
-                    ) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]:
+    def get_weights(
+        self,
+        Y: PIPELINE_DATA_DTYPE,
+        classifier: BaseEstimator,
+        preprocessor: BaseEstimator,
+        init_params: Optional[Dict[str, Any]],
+        fit_params: Optional[Dict[str, Any]],
+    ) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]:
         if init_params is None:
             init_params = {}
 
@@ -45,12 +54,18 @@ def get_weights(self, Y: PIPELINE_DATA_DTYPE,
         # https://github.com/scikit-learn/scikit-learn/blob/0.15.X/sklearn/ensemble/weight_boosting.py#L121
         # Have RF and ET in here because they emit a warning if class_weights
         #  are used together with warmstarts
-        clf_ = ['adaboost', 'random_forest', 'extra_trees', 'sgd', 'passive_aggressive',
-                'gradient_boosting']
+        clf_ = [
+            "adaboost",
+            "random_forest",
+            "extra_trees",
+            "sgd",
+            "passive_aggressive",
+            "gradient_boosting",
+        ]
         pre_: List[str] = []
         if classifier in clf_ or preprocessor in pre_:
             if len(Y.shape) > 1:
-                offsets = [2 ** i for i in range(Y.shape[1])]
+                offsets = [2**i for i in range(Y.shape[1])]
                 Y_ = np.sum(Y * offsets, axis=1)
             else:
                 Y_ = Y
@@ -68,65 +83,68 @@ def get_weights(self, Y: PIPELINE_DATA_DTYPE,
                 sample_weights[mask] *= cw[i]
 
             if classifier in clf_:
-                fit_params['classifier:sample_weight'] = sample_weights
+                fit_params["classifier:sample_weight"] = sample_weights
             if preprocessor in pre_:
-                fit_params['feature_preprocessor:sample_weight'] = sample_weights
+                fit_params["feature_preprocessor:sample_weight"] = sample_weights
 
         # Classifiers which can adjust sample weights themselves via the
         # argument `class_weight`
-        clf_ = ['decision_tree', 'liblinear_svc',
-                'libsvm_svc']
-        pre_ = ['liblinear_svc_preprocessor',
-                'extra_trees_preproc_for_classification']
+        clf_ = ["decision_tree", "liblinear_svc", "libsvm_svc"]
+        pre_ = ["liblinear_svc_preprocessor", "extra_trees_preproc_for_classification"]
         if classifier in clf_:
-            init_params['classifier:class_weight'] = 'balanced'
+            init_params["classifier:class_weight"] = "balanced"
         if preprocessor in pre_:
-            init_params['feature_preprocessor:class_weight'] = 'balanced'
+            init_params["feature_preprocessor:class_weight"] = "balanced"
 
-        clf_ = ['ridge']
+        clf_ = ["ridge"]
         if classifier in clf_:
             class_weights = {}
 
             unique, counts = np.unique(Y, return_counts=True)
-            cw = 1. / counts
+            cw = 1.0 / counts
             cw = cw / np.mean(cw)
 
             for i, ue in enumerate(unique):
                 class_weights[ue] = cw[i]
 
             if classifier in clf_:
-                init_params['classifier:class_weight'] = class_weights
+                init_params["classifier:class_weight"] = class_weights
 
         return init_params, fit_params
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'Balancing',
-                'name': 'Balancing Imbalanced Class Distributions',
-                'handles_missing_values': True,
-                'handles_nominal_values': True,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA, SIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "Balancing",
+            "name": "Balancing Imbalanced Class Distributions",
+            "handles_missing_values": True,
+            "handles_nominal_values": True,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA, SIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         # TODO add replace by zero!
         strategy = CategoricalHyperparameter(
-            "strategy", ["none", "weighting"], default_value="none")
+            "strategy", ["none", "weighting"], default_value="none"
+        )
         cs = ConfigurationSpace()
         cs.add_hyperparameter(strategy)
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
index c4d34ab306..5d1647b24a 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -1,33 +1,34 @@
-from collections import OrderedDict
-import os
-
 from typing import Any, Dict, Optional
 
+import os
+from collections import OrderedDict
+
 from ConfigSpace import Configuration
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
-
 from sklearn.base import BaseEstimator
 
-from ...base import AutoSklearnPreprocessingAlgorithm, find_components, \
-    ThirdPartyComponents, AutoSklearnChoice, _addons
-
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 
+from ...base import (
+    AutoSklearnChoice,
+    AutoSklearnPreprocessingAlgorithm,
+    ThirdPartyComponents,
+    _addons,
+    find_components,
+)
+
 ohe_directory = os.path.split(__file__)[0]
-_ohes = find_components(__package__,
-                        ohe_directory,
-                        AutoSklearnPreprocessingAlgorithm)
+_ohes = find_components(__package__, ohe_directory, AutoSklearnPreprocessingAlgorithm)
 additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
-_addons['data_preprocessing.categorical_encoding'] = additional_components
+_addons["data_preprocessing.categorical_encoding"] = additional_components
 
 
-def add_ohe(ohe: 'OHEChoice') -> None:
+def add_ohe(ohe: "OHEChoice") -> None:
     additional_components.add_component(ohe)
 
 
 class OHEChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         components: Dict[str, BaseEstimator] = OrderedDict()
@@ -49,48 +50,52 @@ def get_hyperparameter_search_space(
 
         # Compile a list of legal preprocessors for this problem
         available_preprocessors = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_preprocessors) == 0:
             raise ValueError(
                 "No ohe hot encoders found, please add any one hot encoder "
-                "component.")
+                "component."
+            )
 
         if default is None:
-            defaults = ['one_hot_encoding', 'no_encoding']
+            defaults = ["one_hot_encoding", "no_encoding"]
             for default_ in defaults:
                 if default_ in available_preprocessors:
                     default = default_
                     break
 
-        preprocessor = CategoricalHyperparameter('__choice__',
-                                                 list(
-                                                     available_preprocessors.keys()),
-                                                 default_value=default)
+        preprocessor = CategoricalHyperparameter(
+            "__choice__", list(available_preprocessors.keys()), default_value=default
+        )
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
-            preprocessor_configuration_space = available_preprocessors[name]. \
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+            preprocessor_configuration_space = available_preprocessors[
+                name
+            ].get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": preprocessor, "value": name}
+            cs.add_configuration_space(
+                name,
+                preprocessor_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
 
         self.configuration_space = cs
         self.dataset_properties = dataset_properties
         return cs
 
-    def set_hyperparameters(self, configuration: Configuration,
-                            init_params: Optional[Dict[str, Any]] = None
-                            ) -> 'OHEChoice':
+    def set_hyperparameters(
+        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+    ) -> "OHEChoice":
         new_params = {}
 
         params = configuration.get_dictionary()
-        choice = params['__choice__']
-        del params['__choice__']
+        choice = params["__choice__"]
+        del params["__choice__"]
 
         for param, value in params.items():
-            param = param.replace(choice, '').replace(':', '')
+            param = param.replace(choice, "").replace(":", "")
             new_params[param] = value
 
         if init_params is not None:
@@ -100,10 +105,10 @@ def set_hyperparameters(self, configuration: Configuration,
                 #  in order to not pass it to the no encoding
                 if choice not in param:
                     continue
-                param = param.replace(choice, '').replace(':', '')
+                param = param.replace(choice, "").replace(":", "")
                 new_params[param] = value
 
-        new_params['random_state'] = self.random_state
+        new_params["random_state"] = self.random_state
 
         self.new_params = new_params
         self.choice = self.get_components()[choice](**new_params)
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
index 3ebb411457..43d578219f 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/encoding.py
@@ -1,30 +1,29 @@
 from typing import Dict, Optional, Tuple, Union
 
 import numpy as np
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-
 import scipy.sparse
-
+from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.preprocessing import OrdinalEncoder
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class OrdinalEncoding(AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE,
-            y: Optional[PIPELINE_DATA_DTYPE] = None) -> 'OrdinalEncoding':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "OrdinalEncoding":
         if not scipy.sparse.issparse(X):
             self.preprocessor = OrdinalEncoder(
-                categories='auto', handle_unknown='use_encoded_value', unknown_value=-1,
+                categories="auto",
+                handle_unknown="use_encoded_value",
+                unknown_value=-1,
             )
             self.preprocessor.fit(X, y)
             return self
@@ -50,20 +49,23 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.preprocessor.transform(X) + 1
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'OrdinalEncoder',
-                'name': 'Ordinal Encoder',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                # TODO find out of this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,), }
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "OrdinalEncoder",
+            "name": "Ordinal Encoder",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            # TODO find out of this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
index ab196396ed..028a4fb9c1 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/no_encoding.py
@@ -1,24 +1,23 @@
 from typing import Dict, Optional, Tuple, Union
-import numpy as np
 
+import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class NoEncoding(AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         pass
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'NoEncoding':
-        self.preprocessor = 'passthrough'
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "NoEncoding":
+        self.preprocessor = "passthrough"
         self.fitted_ = True
         return self
 
@@ -26,22 +25,26 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'no encoding',
-                'name': 'No categorical variable encoding',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "no encoding",
+            "name": "No categorical variable encoding",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
index 8973d1979f..9b9ee87c81 100644
--- a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
@@ -1,33 +1,31 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-
+import numpy as np
 import scipy.sparse
-
+from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.preprocessing import OneHotEncoder as DenseOneHotEncoder
 
-import numpy as np
-
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.implementations.SparseOneHotEncoder import SparseOneHotEncoder
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
+from autosklearn.pipeline.implementations.SparseOneHotEncoder import SparseOneHotEncoder
 
 
 class OneHotEncoder(AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'OneHotEncoder':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "OneHotEncoder":
         if scipy.sparse.issparse(X):
             self.preprocessor = SparseOneHotEncoder()
         else:
             self.preprocessor = DenseOneHotEncoder(
-                sparse=False, categories='auto', handle_unknown='ignore')
+                sparse=False, categories="auto", handle_unknown="ignore"
+            )
         self.preprocessor.fit(X, y)
         return self
 
@@ -37,22 +35,26 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.preprocessor.transform(X)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': '1Hot',
-                'name': 'One Hot Encoder',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                # TODO find out of this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,), }
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "1Hot",
+            "name": "One Hot Encoder",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            # TODO find out of this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
index 318a84ee2d..f2dc2bf304 100644
--- a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
+++ b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
@@ -1,32 +1,32 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-
 import numpy as np
+from ConfigSpace.configuration_space import ConfigurationSpace
 
-from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 import autosklearn.pipeline.implementations.CategoryShift
+from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class CategoryShift(AutoSklearnPreprocessingAlgorithm):
-    """ Add 3 to every category.
+    """Add 3 to every category.
     Down in the pipeline, category 2 will be attribute to missing values,
     category 1 will be assigned to low occurence categories, and category 0
     is not used, so to provide compatibility with sparse matrices.
     """
 
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'CategoryShift':
-        self.preprocessor = autosklearn.pipeline.implementations.CategoryShift\
-            .CategoryShift()
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "CategoryShift":
+        self.preprocessor = (
+            autosklearn.pipeline.implementations.CategoryShift.CategoryShift()
+        )
         self.preprocessor.fit(X, y)
         return self
 
@@ -36,29 +36,33 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.preprocessor.transform(X)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'CategShift',
-                'name': 'Category Shift',
-                'handles_missing_values': True,
-                'handles_nominal_values': True,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out of this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "CategShift",
+            "name": "Category Shift",
+            "handles_missing_values": True,
+            "handles_nominal_values": True,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out of this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_reduction/truncated_svd.py b/autosklearn/pipeline/components/data_preprocessing/feature_reduction/truncated_svd.py
index 3bc4e7c002..c104a18fd7 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_reduction/truncated_svd.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_reduction/truncated_svd.py
@@ -1,15 +1,13 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
 import ConfigSpace.hyperparameters as CSH
-
 import numpy as np
+from ConfigSpace.configuration_space import ConfigurationSpace
+from sklearn.decomposition import TruncatedSVD
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
-
-from sklearn.decomposition import TruncatedSVD
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class FeatureReduction(AutoSklearnPreprocessingAlgorithm):
@@ -20,22 +18,27 @@ class FeatureReduction(AutoSklearnPreprocessingAlgorithm):
     def __init__(
         self,
         n_components: Optional[int] = None,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         self.n_components = n_components
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'FeatureReduction':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "FeatureReduction":
         if X.shape[1] > self.n_components:
-            self.preprocessor = TruncatedSVD(n_components=self.n_components,
-                                             random_state=self.random_state)
+            self.preprocessor = TruncatedSVD(
+                n_components=self.n_components, random_state=self.random_state
+            )
         elif X.shape[1] <= self.n_components and X.shape[1] != 1:
-            self.preprocessor = TruncatedSVD(n_components=X.shape[1] - 1,
-                                             random_state=self.random_state)
+            self.preprocessor = TruncatedSVD(
+                n_components=X.shape[1] - 1, random_state=self.random_state
+            )
         else:
-            raise ValueError("The text embedding consists only of a single dimension.\n"
-                             "Are you sure that your text data is necessary?")
+            raise ValueError(
+                "The text embedding consists only of a single dimension.\n"
+                "Are you sure that your text data is necessary?"
+            )
         self.preprocessor.fit(X)
         return self
 
@@ -45,32 +48,38 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.preprocessor.transform(X)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'TextFeatureReduction',
-                'name': 'TextFeatureReduction',
-                'handles_missing_values': True,
-                'handles_nominal_values': True,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "TextFeatureReduction",
+            "name": "TextFeatureReduction",
+            "handles_missing_values": True,
+            "handles_nominal_values": True,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         cs.add_hyperparameter(
-            CSH.UniformIntegerHyperparameter("n_components", lower=1, upper=10000,
-                                             default_value=100, log=True))
+            CSH.UniformIntegerHyperparameter(
+                "n_components", lower=1, upper=10000, default_value=100, log=True
+            )
+        )
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type.py b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
index cfd31e8c3a..5c37e4cb98 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type.py
@@ -1,38 +1,40 @@
-from typing import Any, List, Dict, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
+import numpy as np
 import sklearn.compose
-from scipy import sparse
-
 from ConfigSpace import Configuration
 from ConfigSpace.configuration_space import ConfigurationSpace
-
-import numpy as np
-
+from scipy import sparse
 from sklearn.base import BaseEstimator
 
+from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES
 from autosklearn.pipeline.base import (
-     BasePipeline,
-     DATASET_PROPERTIES_TYPE,
-     PIPELINE_DATA_DTYPE,
- )
-from autosklearn.pipeline.components.data_preprocessing.feature_type_categorical \
-    import CategoricalPreprocessingPipeline
-from autosklearn.pipeline.components.data_preprocessing.feature_type_numerical \
-    import NumericalPreprocessingPipeline
-from autosklearn.pipeline.components.data_preprocessing.feature_type_text \
-    import TextPreprocessingPipeline
-from autosklearn.pipeline.components.base import AutoSklearnComponent, AutoSklearnChoice, \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
-from autosklearn.data.validation import (
-    SUPPORTED_FEAT_TYPES,
-    SUPPORTED_TARGET_TYPES,
+    DATASET_PROPERTIES_TYPE,
+    PIPELINE_DATA_DTYPE,
+    BasePipeline,
+)
+from autosklearn.pipeline.components.base import (
+    AutoSklearnChoice,
+    AutoSklearnComponent,
+    AutoSklearnPreprocessingAlgorithm,
+)
+from autosklearn.pipeline.components.data_preprocessing.feature_type_categorical import (  # noqa : E501
+    CategoricalPreprocessingPipeline,
 )
+from autosklearn.pipeline.components.data_preprocessing.feature_type_numerical import (
+    NumericalPreprocessingPipeline,
+)
+from autosklearn.pipeline.components.data_preprocessing.feature_type_text import (
+    TextPreprocessingPipeline,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class FeatTypeSplit(AutoSklearnPreprocessingAlgorithm):
-    """ This component is used to apply distinct transformations to categorical,
-    numerical and text features of a dataset. It is built on top of sklearn's ColumnTransformer.
+    """
+    This component is used to apply distinct transformations to categorical,
+    numerical and text features of a dataset. It is built on top of sklearn's
+    ColumnTransformer.
     """
 
     def __init__(
@@ -70,9 +72,14 @@ def __init__(
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
         self.categ_ppl = CategoricalPreprocessingPipeline(
-            config=None, steps=pipeline, dataset_properties=dataset_properties,
-            include=include, exclude=exclude, random_state=random_state,
-            init_params=init_params)
+            config=None,
+            steps=pipeline,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+        )
         # The pipeline that will be applied to the numerical features (i.e. columns)
         # of the dataset
         # Configuration of the data-preprocessor is different from the configuration of
@@ -81,9 +88,14 @@ def __init__(
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
         self.numer_ppl = NumericalPreprocessingPipeline(
-            config=None, steps=pipeline, dataset_properties=dataset_properties,
-            include=include, exclude=exclude, random_state=random_state,
-            init_params=init_params)
+            config=None,
+            steps=pipeline,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+        )
 
         # The pipeline that will be applied to the text features (i.e. columns)
         # of the dataset
@@ -93,9 +105,14 @@ def __init__(
         # TODO: Extract the child configuration space from the FeatTypeSplit to the
         # pipeline if needed
         self.txt_ppl = TextPreprocessingPipeline(
-            config=None, steps=pipeline, dataset_properties=dataset_properties,
-            include=include, exclude=exclude, random_state=random_state,
-            init_params=init_params)
+            config=None,
+            steps=pipeline,
+            dataset_properties=dataset_properties,
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+        )
 
         self._transformers: List[Tuple[str, AutoSklearnComponent]] = [
             ("categorical_transformer", self.categ_ppl),
@@ -106,8 +123,9 @@ def __init__(
             self.set_hyperparameters(self.config, init_params=init_params)
         self.column_transformer = column_transformer
 
-    def fit(self, X: SUPPORTED_FEAT_TYPES, y: Optional[SUPPORTED_TARGET_TYPES] = None
-            ) -> 'FeatTypeSplit':
+    def fit(
+        self, X: SUPPORTED_FEAT_TYPES, y: Optional[SUPPORTED_TARGET_TYPES] = None
+    ) -> "FeatTypeSplit":
 
         n_feats = X.shape[1]
         categorical_features = []
@@ -116,78 +134,99 @@ def fit(self, X: SUPPORTED_FEAT_TYPES, y: Optional[SUPPORTED_TARGET_TYPES] = Non
         if self.feat_type is not None:
             # Make sure that we are not missing any column!
             expected = set(self.feat_type.keys())
-            if hasattr(X, 'columns'):
+            if hasattr(X, "columns"):
                 columns = set(X.columns)
             else:
                 columns = set(range(n_feats))
             if expected != columns:
-                raise ValueError(f"Train data has columns={expected} yet the"
-                                 f" feat_types are feat={columns}")
-            categorical_features = [key for key, value in self.feat_type.items()
-                                    if value.lower() == 'categorical']
-            numerical_features = [key for key, value in self.feat_type.items()
-                                  if value.lower() == 'numerical']
-            text_features = [key for key, value in self.feat_type.items()
-                             if value.lower() == "string"]
+                raise ValueError(
+                    f"Train data has columns={expected} yet the"
+                    f" feat_types are feat={columns}"
+                )
+            categorical_features = [
+                key
+                for key, value in self.feat_type.items()
+                if value.lower() == "categorical"
+            ]
+            numerical_features = [
+                key
+                for key, value in self.feat_type.items()
+                if value.lower() == "numerical"
+            ]
+            text_features = [
+                key
+                for key, value in self.feat_type.items()
+                if value.lower() == "string"
+            ]
 
             sklearn_transf_spec = [
                 (name, transformer, feature_columns)
-                for name, transformer, feature_columns
-                in [
+                for name, transformer, feature_columns in [
                     ("text_transformer", self.txt_ppl, text_features),
                     ("categorical_transformer", self.categ_ppl, categorical_features),
-                    ("numerical_transformer", self.numer_ppl, numerical_features)
+                    ("numerical_transformer", self.numer_ppl, numerical_features),
                 ]
                 if len(feature_columns) > 0
             ]
         else:
             # self.feature_type == None assumes numerical case
-            sklearn_transf_spec = [("numerical_transformer", self.numer_ppl, [True]*n_feats)]
+            sklearn_transf_spec = [
+                ("numerical_transformer", self.numer_ppl, [True] * n_feats)
+            ]
 
         # And one last check in case feat type is None
         # And to make sure the final specification has all the columns
         # considered in the column transformer
-        total_columns = sum([len(features) for name, ppl, features in sklearn_transf_spec])
+        total_columns = sum(
+            [len(features) for name, ppl, features in sklearn_transf_spec]
+        )
         if total_columns != n_feats:
-            raise ValueError("Missing columns in the specification of the data validator"
-                             f" for train data={np.shape(X)} and spec={sklearn_transf_spec}")
+            raise ValueError(
+                "Missing columns in the specification of the data validator"
+                f" for train data={np.shape(X)} and spec={sklearn_transf_spec}"
+            )
 
         self.sparse_ = sparse.issparse(X) or self.force_sparse_output
         self.column_transformer = sklearn.compose.ColumnTransformer(
             transformers=sklearn_transf_spec,
             sparse_threshold=float(self.sparse_),
-            )
+        )
         self.column_transformer.fit(X, y)
         return self
 
     def transform(self, X: SUPPORTED_FEAT_TYPES) -> PIPELINE_DATA_DTYPE:
         if self.column_transformer is None:
-            raise ValueError("Cannot call transform on a Datapreprocessor that has not"
-                             "yet been fit. Please check the log files for errors "
-                             "while trying to fit the model."
-                             )
+            raise ValueError(
+                "Cannot call transform on a Datapreprocessor that has not"
+                "yet been fit. Please check the log files for errors "
+                "while trying to fit the model."
+            )
         return self.column_transformer.transform(X)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'FeatTypeSplit',
-                'name': 'Feature Type Splitter',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                # TODO find out of this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,), }
-
-    def set_hyperparameters(self, configuration: Configuration,
-                            init_params: Optional[Dict[str, Any]] = None) -> 'FeatTypeSplit':
-        if init_params is not None and 'feat_type' in init_params.keys():
-            self.feat_type = init_params['feat_type']
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "FeatTypeSplit",
+            "name": "Feature Type Splitter",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            # TODO find out of this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
+
+    def set_hyperparameters(
+        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+    ) -> "FeatTypeSplit":
+        if init_params is not None and "feat_type" in init_params.keys():
+            self.feat_type = init_params["feat_type"]
 
         self.config = configuration
 
@@ -197,29 +236,32 @@ def set_hyperparameters(self, configuration: Configuration,
             )
             sub_config_dict = {}
             for param in configuration:
-                if param.startswith('%s:' % transf_name):
+                if param.startswith("%s:" % transf_name):
                     value = configuration[param]
-                    new_name = param.replace('%s:' % transf_name, '', 1)
+                    new_name = param.replace("%s:" % transf_name, "", 1)
                     sub_config_dict[new_name] = value
 
-            sub_configuration = Configuration(sub_configuration_space,
-                                              values=sub_config_dict)
+            sub_configuration = Configuration(
+                sub_configuration_space, values=sub_config_dict
+            )
 
             sub_init_params_dict: Optional[Dict[str, Any]] = None
             if init_params is not None:
                 sub_init_params_dict = {}
                 for param in init_params:
-                    if param.startswith('%s:' % transf_name):
+                    if param.startswith("%s:" % transf_name):
                         value = init_params[param]
-                        new_name = param.replace('%s:' % transf_name, '', 1)
+                        new_name = param.replace("%s:" % transf_name, "", 1)
                         sub_init_params_dict[new_name] = value
 
-            if isinstance(transf_op, (
-                    AutoSklearnChoice, AutoSklearnComponent, BasePipeline)):
+            if isinstance(
+                transf_op, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)
+            ):
                 transf_op.set_hyperparameters(
-                    configuration=sub_configuration, init_params=sub_init_params_dict)
+                    configuration=sub_configuration, init_params=sub_init_params_dict
+                )
             else:
-                raise NotImplementedError('Not supported yet!')
+                raise NotImplementedError("Not supported yet!")
 
         return self
 
@@ -230,7 +272,8 @@ def get_hyperparameter_search_space(
         self.dataset_properties = dataset_properties
         cs = ConfigurationSpace()
         cs = FeatTypeSplit._get_hyperparameter_search_space_recursevely(
-            dataset_properties, cs, self._transformers)
+            dataset_properties, cs, self._transformers
+        )
         return cs
 
     @staticmethod
@@ -243,8 +286,10 @@ def _get_hyperparameter_search_space_recursevely(
             if hasattr(st_operation, "get_hyperparameter_search_space"):
                 cs.add_configuration_space(
                     st_name,
-                    st_operation.get_hyperparameter_search_space(dataset_properties))
+                    st_operation.get_hyperparameter_search_space(dataset_properties),
+                )
             else:
                 return FeatTypeSplit._get_hyperparameter_search_space_recursevely(
-                    dataset_properties, cs, st_operation)
+                    dataset_properties, cs, st_operation
+                )
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
index f8430aa978..0a46887799 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py
@@ -1,27 +1,26 @@
-from typing import Any, List, Dict, Optional, Tuple, Union
-
-from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
-
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from sklearn.base import BaseEstimator
 
-from autosklearn.pipeline.components.data_preprocessing.category_shift.\
-    category_shift import CategoryShift
-from autosklearn.pipeline.components.data_preprocessing.imputation.\
-    categorical_imputation import CategoricalImputation
-from autosklearn.pipeline.components.data_preprocessing.minority_coalescense \
-    import CoalescenseChoice
-from autosklearn.pipeline.components.data_preprocessing.categorical_encoding \
-    import OHEChoice
-from autosklearn.pipeline.components.data_preprocessing.categorical_encoding.encoding import (
-    OrdinalEncoding
+from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline
+from autosklearn.pipeline.components.data_preprocessing.categorical_encoding import (  # noqa: E501
+    OHEChoice,
+)
+from autosklearn.pipeline.components.data_preprocessing.categorical_encoding.encoding import (  # noqa: E501
+    OrdinalEncoding,
+)
+from autosklearn.pipeline.components.data_preprocessing.category_shift.category_shift import (  # noqa: E501
+    CategoryShift,
 )
-from autosklearn.pipeline.base import (
-    BasePipeline,
-    DATASET_PROPERTIES_TYPE,
+from autosklearn.pipeline.components.data_preprocessing.imputation.categorical_imputation import (  # noqa: E501
+    CategoricalImputation,
 )
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.components.data_preprocessing.minority_coalescense import (
+    CoalescenseChoice,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class CategoricalPreprocessingPipeline(BasePipeline):
@@ -53,35 +52,43 @@ def __init__(
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params: Optional[Dict[str, Any]] = None
+        init_params: Optional[Dict[str, Any]] = None,
     ) -> None:
         self._output_dtype = np.int32
         super().__init__(
-            config, steps, dataset_properties, include, exclude,
-            random_state, init_params
+            config,
+            steps,
+            dataset_properties,
+            include,
+            exclude,
+            random_state,
+            init_params,
         )
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'cat_datapreproc',
-                'name': 'categorical data preprocessing',
-                'handles_missing_values': True,
-                'handles_nominal_values': True,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "cat_datapreproc",
+            "name": "categorical data preprocessing",
+            "handles_missing_values": True,
+            "handles_nominal_values": True,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     def _get_hyperparameter_search_space(
         self,
@@ -102,27 +109,34 @@ def _get_hyperparameter_search_space(
             dataset_properties = dict()
 
         cs = self._get_base_search_space(
-            cs=cs, dataset_properties=dataset_properties,
-            exclude=exclude, include=include, pipeline=self.steps)
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=self.steps,
+        )
 
         return cs
 
-    def _get_pipeline_steps(self,
-                            dataset_properties: Optional[Dict[str, str]] = None,
-                            ) -> List[Tuple[str, BaseEstimator]]:
+    def _get_pipeline_steps(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+    ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
 
         default_dataset_properties = {}
         if dataset_properties is not None and isinstance(dataset_properties, dict):
             default_dataset_properties.update(dataset_properties)
 
-        steps.extend([
-            ("imputation", CategoricalImputation()),
-            ("encoding", OrdinalEncoding()),
-            ("category_shift", CategoryShift()),
-            ("category_coalescence", CoalescenseChoice(default_dataset_properties)),
-            ("categorical_encoding", OHEChoice(default_dataset_properties)),
-            ])
+        steps.extend(
+            [
+                ("imputation", CategoricalImputation()),
+                ("encoding", OrdinalEncoding()),
+                ("category_shift", CategoryShift()),
+                ("category_coalescence", CoalescenseChoice(default_dataset_properties)),
+                ("categorical_encoding", OHEChoice(default_dataset_properties)),
+            ]
+        )
 
         return steps
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
index 5ef47e2699..18008378ab 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py
@@ -1,23 +1,20 @@
-from typing import Any, List, Dict, Optional, Tuple, Union
-
-from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
-
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from sklearn.base import BaseEstimator
 
-from autosklearn.pipeline.components.data_preprocessing import rescaling as \
-    rescaling_components
-from autosklearn.pipeline.components.data_preprocessing.imputation.numerical_imputation \
-    import NumericalImputation
-from autosklearn.pipeline.components.data_preprocessing.variance_threshold\
-    .variance_threshold import VarianceThreshold
-
-from autosklearn.pipeline.base import (
-    BasePipeline,
-    DATASET_PROPERTIES_TYPE,
+from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline
+from autosklearn.pipeline.components.data_preprocessing import (
+    rescaling as rescaling_components,
+)
+from autosklearn.pipeline.components.data_preprocessing.imputation.numerical_imputation import (  # noqa: E501
+    NumericalImputation,
 )
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.components.data_preprocessing.variance_threshold.variance_threshold import (  # noqa: E501
+    VarianceThreshold,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class NumericalPreprocessingPipeline(BasePipeline):
@@ -48,35 +45,43 @@ def __init__(
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params: Optional[Dict[str, Any]] = None
+        init_params: Optional[Dict[str, Any]] = None,
     ) -> None:
         self._output_dtype = np.int32
         super().__init__(
-            config, steps, dataset_properties, include, exclude,
-            random_state, init_params
+            config,
+            steps,
+            dataset_properties,
+            include,
+            exclude,
+            random_state,
+            init_params,
         )
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'num_datapreproc',
-                'name': 'numeric data preprocessing',
-                'handles_missing_values': True,
-                'handles_nominal_values': True,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "num_datapreproc",
+            "name": "numeric data preprocessing",
+            "handles_missing_values": True,
+            "handles_nominal_values": True,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     def _get_hyperparameter_search_space(
         self,
@@ -100,25 +105,35 @@ def _get_hyperparameter_search_space(
             dataset_properties = dict()
 
         cs = self._get_base_search_space(
-            cs=cs, dataset_properties=dataset_properties,
-            exclude=exclude, include=include, pipeline=self.steps)
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=self.steps,
+        )
 
         return cs
 
-    def _get_pipeline_steps(self,
-                            dataset_properties: Optional[Dict[str, str]] = None,
-                            ) -> List[Tuple[str, BaseEstimator]]:
+    def _get_pipeline_steps(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+    ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
 
         default_dataset_properties = {}
         if dataset_properties is not None and isinstance(dataset_properties, dict):
             default_dataset_properties.update(dataset_properties)
 
-        steps.extend([
-            ("imputation", NumericalImputation()),
-            ("variance_threshold", VarianceThreshold()),
-            ("rescaling", rescaling_components.RescalingChoice(default_dataset_properties)),
-            ])
+        steps.extend(
+            [
+                ("imputation", NumericalImputation()),
+                ("variance_threshold", VarianceThreshold()),
+                (
+                    "rescaling",
+                    rescaling_components.RescalingChoice(default_dataset_properties),
+                ),
+            ]
+        )
 
         return steps
 
diff --git a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
index a21980f000..6030460ee1 100644
--- a/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
+++ b/autosklearn/pipeline/components/data_preprocessing/feature_type_text.py
@@ -1,20 +1,17 @@
-from typing import Any, List, Dict, Optional, Tuple, Union
-
-from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy as np
-
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
 from sklearn.base import BaseEstimator
 
-from autosklearn.pipeline.components.data_preprocessing.text_encoding \
-    import BagOfWordChoice
-from autosklearn.pipeline.components.data_preprocessing.feature_reduction.truncated_svd import \
-    FeatureReduction
-from autosklearn.pipeline.base import (
-    BasePipeline,
-    DATASET_PROPERTIES_TYPE,
+from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, BasePipeline
+from autosklearn.pipeline.components.data_preprocessing.feature_reduction.truncated_svd import (  # noqa: 501
+    FeatureReduction,
+)
+from autosklearn.pipeline.components.data_preprocessing.text_encoding import (
+    BagOfWordChoice,
 )
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class TextPreprocessingPipeline(BasePipeline):
@@ -43,34 +40,42 @@ def __init__(
         include: Optional[Dict[str, str]] = None,
         exclude: Optional[Dict[str, str]] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params: Optional[Dict[str, Any]] = None
+        init_params: Optional[Dict[str, Any]] = None,
     ) -> None:
         self._output_dtype = np.int32
         super().__init__(
-            config, steps, dataset_properties, include, exclude,
-            random_state, init_params
+            config,
+            steps,
+            dataset_properties,
+            include,
+            exclude,
+            random_state,
+            init_params,
         )
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'txt_datapreproc',
-                'name': 'text data preprocessing',
-                'handles_missing_values': True,
-                'handles_nominal_values': False,
-                'handles_numerical_features': False,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'is_deterministic': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "txt_datapreproc",
+            "name": "text data preprocessing",
+            "handles_missing_values": True,
+            "handles_nominal_values": False,
+            "handles_numerical_features": False,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "is_deterministic": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     def _get_hyperparameter_search_space(
         self,
@@ -94,25 +99,36 @@ def _get_hyperparameter_search_space(
             dataset_properties = dict()
 
         cs = self._get_base_search_space(
-            cs=cs, dataset_properties=dataset_properties,
-            exclude=exclude, include=include, pipeline=self.steps)
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=self.steps,
+        )
 
         return cs
 
-    def _get_pipeline_steps(self,
-                            dataset_properties: Optional[Dict[str, str]] = None,
-                            ) -> List[Tuple[str, BaseEstimator]]:
+    def _get_pipeline_steps(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+    ) -> List[Tuple[str, BaseEstimator]]:
         steps = []
 
         default_dataset_properties = {}
         if dataset_properties is not None and isinstance(dataset_properties, dict):
             default_dataset_properties.update(dataset_properties)
 
-        steps.extend([
-            ("text_encoding", BagOfWordChoice(default_dataset_properties,
-                                              random_state=self.random_state)),
-            ("feature_reduction", FeatureReduction(random_state=self.random_state))
-        ])
+        steps.extend(
+            [
+                (
+                    "text_encoding",
+                    BagOfWordChoice(
+                        default_dataset_properties, random_state=self.random_state
+                    ),
+                ),
+                ("feature_reduction", FeatureReduction(random_state=self.random_state)),
+            ]
+        )
         return steps
 
     def _get_estimator_hyperparameter_name(self) -> str:
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
index 519155ea20..00b627daed 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py
@@ -1,13 +1,12 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-
 import numpy as np
+from ConfigSpace.configuration_space import ConfigurationSpace
 from scipy.sparse import spmatrix
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class CategoricalImputation(AutoSklearnPreprocessingAlgorithm):
@@ -20,16 +19,16 @@ class CategoricalImputation(AutoSklearnPreprocessingAlgorithm):
     """
 
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE,
-            y: Optional[PIPELINE_DATA_DTYPE] = None) -> 'CategoricalImputation':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "CategoricalImputation":
         import sklearn.impute
 
-        if hasattr(X, 'columns'):
+        if hasattr(X, "columns"):
             kind = X[X.columns[-1]].dtype.kind
         else:
             # Series, sparse and numpy have dtype
@@ -53,7 +52,7 @@ def fit(self, X: PIPELINE_DATA_DTYPE,
                 fill_value = min(np.unique(X)) - 1
 
         self.preprocessor = sklearn.impute.SimpleImputer(
-            strategy='constant', copy=False, fill_value=fill_value
+            strategy="constant", copy=False, fill_value=fill_value
         )
         self.preprocessor.fit(X)
         return self
@@ -65,29 +64,33 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'CategoricalImputation',
-                'name': 'Categorical Imputation',
-                'handles_missing_values': True,
-                'handles_nominal_values': True,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out of this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "CategoricalImputation",
+            "name": "Categorical Imputation",
+            "handles_missing_values": True,
+            "handles_nominal_values": True,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out of this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
index e730718032..d7d6a645ab 100644
--- a/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
+++ b/autosklearn/pipeline/components/data_preprocessing/imputation/numerical_imputation.py
@@ -1,31 +1,31 @@
 from typing import Dict, Optional, Tuple, Union
 
+import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
-import numpy as np
-
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class NumericalImputation(AutoSklearnPreprocessingAlgorithm):
-
     def __init__(
         self,
-        strategy: str = 'mean',
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        strategy: str = "mean",
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         self.strategy = strategy
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE,
-            y: Optional[PIPELINE_DATA_DTYPE] = None) -> 'NumericalImputation':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "NumericalImputation":
         import sklearn.impute
 
         self.preprocessor = sklearn.impute.SimpleImputer(
-            strategy=self.strategy, copy=False)
+            strategy=self.strategy, copy=False
+        )
         self.preprocessor.fit(X)
         return self
 
@@ -35,34 +35,39 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.preprocessor.transform(X)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'NumericalImputation',
-                'name': 'Numerical Imputation',
-                'handles_missing_values': True,
-                'handles_nominal_values': True,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "NumericalImputation",
+            "name": "Numerical Imputation",
+            "handles_missing_values": True,
+            "handles_nominal_values": True,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         # TODO add replace by zero!
         strategy = CategoricalHyperparameter(
-            "strategy", ["mean", "median", "most_frequent"], default_value="mean")
+            "strategy", ["mean", "median", "most_frequent"], default_value="mean"
+        )
         cs = ConfigurationSpace()
         cs.add_hyperparameter(strategy)
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
index 0db0955cb5..fbf999761c 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -1,24 +1,27 @@
-from collections import OrderedDict
-import os
-
 from typing import Any, Dict, Optional
 
+import os
+from collections import OrderedDict
+
 from ConfigSpace import Configuration
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
-
-from ...base import AutoSklearnPreprocessingAlgorithm, find_components, \
-    ThirdPartyComponents, AutoSklearnChoice, _addons
-
 from sklearn.base import BaseEstimator
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 
+from ...base import (
+    AutoSklearnChoice,
+    AutoSklearnPreprocessingAlgorithm,
+    ThirdPartyComponents,
+    _addons,
+    find_components,
+)
+
 mc_directory = os.path.split(__file__)[0]
-_mcs = find_components(
-    __package__, mc_directory, AutoSklearnPreprocessingAlgorithm)
+_mcs = find_components(__package__, mc_directory, AutoSklearnPreprocessingAlgorithm)
 additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
-_addons['data_preprocessing.minority_coalescense'] = additional_components
+_addons["data_preprocessing.minority_coalescense"] = additional_components
 
 
 def add_mc(mc: BaseEstimator) -> None:
@@ -26,7 +29,6 @@ def add_mc(mc: BaseEstimator) -> None:
 
 
 class CoalescenseChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         components: Dict[str, BaseEstimator] = OrderedDict()
@@ -48,46 +50,52 @@ def get_hyperparameter_search_space(
 
         # Compile a list of legal preprocessors for this problem
         available_preprocessors = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_preprocessors) == 0:
             raise ValueError(
                 "No minority coalescers found, please add any one minority coalescer"
-                "component.")
+                "component."
+            )
 
         if default is None:
-            defaults = ['minority_coalescer', 'no_coalescense']
+            defaults = ["minority_coalescer", "no_coalescense"]
             for default_ in defaults:
                 if default_ in available_preprocessors:
                     default = default_
                     break
 
         preprocessor = CategoricalHyperparameter(
-            '__choice__', list(available_preprocessors.keys()), default_value=default)
+            "__choice__", list(available_preprocessors.keys()), default_value=default
+        )
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
-            preprocessor_configuration_space = available_preprocessors[name]. \
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+            preprocessor_configuration_space = available_preprocessors[
+                name
+            ].get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": preprocessor, "value": name}
+            cs.add_configuration_space(
+                name,
+                preprocessor_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
 
         self.configuration_space = cs
         self.dataset_properties = dataset_properties
         return cs
 
-    def set_hyperparameters(self, configuration: Configuration,
-                            init_params: Optional[Dict[str, Any]] = None
-                            ) -> 'CoalescenseChoice':
+    def set_hyperparameters(
+        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+    ) -> "CoalescenseChoice":
         new_params = {}
 
         params = configuration.get_dictionary()
-        choice = params['__choice__']
-        del params['__choice__']
+        choice = params["__choice__"]
+        del params["__choice__"]
 
         for param, value in params.items():
-            param = param.replace(choice, '').replace(':', '')
+            param = param.replace(choice, "").replace(":", "")
             new_params[param] = value
 
         if init_params is not None:
@@ -97,10 +105,10 @@ def set_hyperparameters(self, configuration: Configuration,
                 #  in order to not pass it to the no encoding
                 if choice not in param:
                     continue
-                param = param.replace(choice, '').replace(':', '')
+                param = param.replace(choice, "").replace(":", "")
                 new_params[param] = value
 
-        new_params['random_state'] = self.random_state
+        new_params["random_state"] = self.random_state
 
         self.new_params = new_params
         self.choice = self.get_components()[choice](**new_params)
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
index 5b3b66caa7..278cf0bfb9 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/minority_coalescer.py
@@ -1,34 +1,35 @@
 from typing import Dict, Optional, Tuple, Union
 
-
+import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
 
-import numpy as np
-
 import autosklearn.pipeline.implementations.MinorityCoalescer
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class MinorityCoalescer(AutoSklearnPreprocessingAlgorithm):
-    """ Group together categories which occurence is less than a specified minimum fraction.
-    """
+    """Group categories whose occurence is less than a specified minimum fraction."""
 
     def __init__(
         self,
         minimum_fraction: float = 0.01,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         self.minimum_fraction = minimum_fraction
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'MinorityCoalescer':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "MinorityCoalescer":
         self.minimum_fraction = float(self.minimum_fraction)
 
-        self.preprocessor = autosklearn.pipeline.implementations.MinorityCoalescer\
-            .MinorityCoalescer(minimum_fraction=self.minimum_fraction)
+        self.preprocessor = (
+            autosklearn.pipeline.implementations.MinorityCoalescer.MinorityCoalescer(
+                minimum_fraction=self.minimum_fraction
+            )
+        )
         self.preprocessor.fit(X, y)
         return self
 
@@ -38,26 +39,31 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.preprocessor.transform(X)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'coalescer',
-                'name': 'Categorical minority coalescer',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                # TODO find out of this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,), }
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "coalescer",
+            "name": "Categorical minority coalescer",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            # TODO find out of this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         minimum_fraction = UniformFloatHyperparameter(
-            "minimum_fraction", lower=.0001, upper=0.5, default_value=0.01, log=True)
+            "minimum_fraction", lower=0.0001, upper=0.5, default_value=0.01, log=True
+        )
         cs.add_hyperparameter(minimum_fraction)
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
index d252821ccc..d05c146d98 100644
--- a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
+++ b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/no_coalescense.py
@@ -1,47 +1,49 @@
 from typing import Dict, Optional, Tuple, Union
 
-
+import numpy as np
 from ConfigSpace.configuration_space import ConfigurationSpace
 
-import numpy as np
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class NoCoalescence(AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         pass
 
-    def fit(self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> PIPELINE_DATA_DTYPE:
-        self.preprocessor = 'passthrough'
+    def fit(
+        self, X: np.array, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> PIPELINE_DATA_DTYPE:
+        self.preprocessor = "passthrough"
         return self
 
     def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'no coalescence',
-                'name': 'No categorical variable coalescence',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "no coalescence",
+            "name": "No categorical variable coalescence",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
index b37ad3ce24..2a9fbdb842 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
@@ -1,26 +1,31 @@
-from collections import OrderedDict
-import os
-
 from typing import Dict, Optional
 
+import os
+from collections import OrderedDict
+
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
-
 from sklearn.base import BaseEstimator
 
-from ...base import AutoSklearnPreprocessingAlgorithm, find_components, \
-    ThirdPartyComponents, AutoSklearnChoice, _addons
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (
-    Rescaling
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+
+from ...base import (
+    AutoSklearnChoice,
+    AutoSklearnPreprocessingAlgorithm,
+    ThirdPartyComponents,
+    _addons,
+    find_components,
 )
 
 rescaling_directory = os.path.split(__file__)[0]
-_rescalers = find_components(__package__,
-                             rescaling_directory,
-                             AutoSklearnPreprocessingAlgorithm)
+_rescalers = find_components(
+    __package__, rescaling_directory, AutoSklearnPreprocessingAlgorithm
+)
 additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
-_addons['data_preprocessing.rescaling'] = additional_components
+_addons["data_preprocessing.rescaling"] = additional_components
 
 
 def add_rescaler(rescaler: Rescaling) -> None:
@@ -28,7 +33,6 @@ def add_rescaler(rescaler: Rescaling) -> None:
 
 
 class RescalingChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         components: Dict[str, BaseEstimator] = OrderedDict()
@@ -50,31 +54,33 @@ def get_hyperparameter_search_space(
 
         # Compile a list of legal preprocessors for this problem
         available_preprocessors = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_preprocessors) == 0:
-            raise ValueError(
-                "No rescalers found, please add any rescaling component.")
+            raise ValueError("No rescalers found, please add any rescaling component.")
 
         if default is None:
-            defaults = ['standardize', 'none', 'minmax', 'normalize']
+            defaults = ["standardize", "none", "minmax", "normalize"]
             for default_ in defaults:
                 if default_ in available_preprocessors:
                     default = default_
                     break
 
-        preprocessor = CategoricalHyperparameter('__choice__',
-                                                 list(
-                                                     available_preprocessors.keys()),
-                                                 default_value=default)
+        preprocessor = CategoricalHyperparameter(
+            "__choice__", list(available_preprocessors.keys()), default_value=default
+        )
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
-            preprocessor_configuration_space = available_preprocessors[name]. \
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+            preprocessor_configuration_space = available_preprocessors[
+                name
+            ].get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": preprocessor, "value": name}
+            cs.add_configuration_space(
+                name,
+                preprocessor_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
 
         self.configuration_space = cs
         self.dataset_properties = dataset_properties
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
index dc9c9c60ac..05e1a4e898 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/abstract_rescaling.py
@@ -1,9 +1,7 @@
 from typing import Optional, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-
 import numpy as np
-
+from ConfigSpace.configuration_space import ConfigurationSpace
 from sklearn.base import BaseEstimator
 from sklearn.exceptions import NotFittedError
 
@@ -14,16 +12,13 @@
 class Rescaling(object):
     # Rescaling does not support fit_transform (as of 0.19.1)!
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         self.preprocessor: Optional[BaseEstimator] = None
 
     def fit(
-        self,
-        X: PIPELINE_DATA_DTYPE,
-        y: Optional[PIPELINE_DATA_DTYPE] = None
-    ) -> 'AutoSklearnPreprocessingAlgorithm':
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "AutoSklearnPreprocessingAlgorithm":
 
         if self.preprocessor is None:
             raise NotFittedError()
@@ -42,7 +37,8 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return transformed_X
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/minmax.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/minmax.py
index 67650376e1..3663a23d35 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/minmax.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/minmax.py
@@ -3,36 +3,43 @@
 import numpy as np
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, SIGNED_DATA, INPUT
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling \
-    import Rescaling
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SIGNED_DATA, UNSIGNED_DATA
 
 
 class MinMaxScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None):
+    def __init__(
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
+    ):
         from sklearn.preprocessing import MinMaxScaler
+
         self.preprocessor = MinMaxScaler(copy=False)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'MinMaxScaler',
-                'name': 'MinMaxScaler',
-                'handles_missing_values': False,
-                'handles_nominal_values': False,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': False,
-                'handles_dense': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (INPUT, SIGNED_DATA),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "MinMaxScaler",
+            "name": "MinMaxScaler",
+            "handles_missing_values": False,
+            "handles_nominal_values": False,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": False,
+            "handles_dense": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (INPUT, SIGNED_DATA),
+            "preferred_dtype": None,
+        }
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/none.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/none.py
index 83377e2544..ee94213f57 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/none.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/none.py
@@ -1,41 +1,45 @@
 from typing import Dict, Optional, Tuple, Union
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, INPUT, SPARSE
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling \
-    import Rescaling
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class NoRescalingComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
-
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'AutoSklearnPreprocessingAlgorithm':
-        self.preprocessor = 'passthrough'
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "AutoSklearnPreprocessingAlgorithm":
+        self.preprocessor = "passthrough"
         return self
 
     def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'NoRescaling',
-                'name': 'NoRescaling',
-                'handles_missing_values': False,
-                'handles_nominal_values': False,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "NoRescaling",
+            "name": "NoRescaling",
+            "handles_missing_values": False,
+            "handles_nominal_values": False,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/normalize.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/normalize.py
index 036c75a4ef..00395833e9 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/normalize.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/normalize.py
@@ -3,41 +3,45 @@
 import numpy as np
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, INPUT, SPARSE
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling \
-    import Rescaling
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class NormalizerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         # Use custom implementation because sklearn implementation cannot
         # handle float32 input matrix
         from sklearn.preprocessing import Normalizer
+
         self.preprocessor = Normalizer(copy=False)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'Normalizer',
-                'name': 'Normalizer',
-                'handles_missing_values': False,
-                'handles_nominal_values': False,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "Normalizer",
+            "name": "Normalizer",
+            "handles_missing_values": False,
+            "handles_nominal_values": False,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/power_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/power_transformer.py
index 759c921caa..dd9ab616ae 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/power_transformer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/power_transformer.py
@@ -3,10 +3,11 @@
 import numpy as np
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, INPUT
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling \
-    import Rescaling
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA
 
 
 class PowerTransformerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
@@ -15,27 +16,31 @@ def __init__(
         random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         from sklearn.preprocessing import PowerTransformer
+
         self.preprocessor = PowerTransformer(copy=False)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'PowerTransformer',
-                'name': 'PowerTransformer',
-                'handles_missing_values': False,
-                'handles_nominal_values': False,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out of this is right!
-                'handles_sparse': False,
-                'handles_dense': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "PowerTransformer",
+            "name": "PowerTransformer",
+            "handles_missing_values": False,
+            "handles_nominal_values": False,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out of this is right!
+            "handles_sparse": False,
+            "handles_dense": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
index b7206fbaaf..2611c0650d 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/quantile_transformer.py
@@ -1,17 +1,24 @@
 from typing import Dict, Optional, Tuple, Union
 
 import numpy as np
-
-from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformIntegerHyperparameter, \
-    CategoricalHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, SIGNED_DATA, SPARSE, INPUT
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling \
-    import Rescaling
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
 
 
 class QuantileTransformerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
@@ -19,46 +26,51 @@ def __init__(
         self,
         n_quantiles: int,
         output_distribution: str,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         from sklearn.preprocessing import QuantileTransformer
+
         self.n_quantiles = n_quantiles
         self.output_distribution = output_distribution
         self.preprocessor = QuantileTransformer(
             n_quantiles=n_quantiles,
             output_distribution=output_distribution,
             copy=False,
-            random_state=random_state
+            random_state=random_state,
         )
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'QuantileTransformer',
-                'name': 'QuantileTransformer',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT, SIGNED_DATA),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "QuantileTransformer",
+            "name": "QuantileTransformer",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT, SIGNED_DATA),
+            "preferred_dtype": None,
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         # TODO parametrize like the Random Forest as n_quantiles = n_features^param
         n_quantiles = UniformIntegerHyperparameter(
-            'n_quantiles', lower=10, upper=2000, default_value=1000
+            "n_quantiles", lower=10, upper=2000, default_value=1000
         )
         output_distribution = CategoricalHyperparameter(
-            'output_distribution', ['normal', 'uniform']
+            "output_distribution", ["normal", "uniform"]
         )
         cs.add_hyperparameters((n_quantiles, output_distribution))
         return cs
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
index 614b79ee40..af3b4c0558 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/robust_scaler.py
@@ -1,19 +1,23 @@
 from typing import Dict, Optional, Tuple, Union
 
 import numpy as np
-
-
-from scipy import sparse
-from sklearn.exceptions import NotFittedError
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
+from scipy import sparse
+from sklearn.exceptions import NotFittedError
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, SIGNED_DATA, INPUT, SPARSE
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling \
-    import Rescaling
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
 
 
 class RobustScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
@@ -21,48 +25,51 @@ def __init__(
         self,
         q_min: float,
         q_max: float,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         from sklearn.preprocessing import RobustScaler
+
         self.q_min = q_min
         self.q_max = q_max
         self.preprocessor = RobustScaler(
-            quantile_range=(self.q_min, self.q_max), copy=False,
+            quantile_range=(self.q_min, self.q_max),
+            copy=False,
         )
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'RobustScaler',
-                'name': 'RobustScaler',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT, SIGNED_DATA),
-                'preferred_dtype': None}
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "RobustScaler",
+            "name": "RobustScaler",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT, SIGNED_DATA),
+            "preferred_dtype": None,
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
-        q_min = UniformFloatHyperparameter(
-            'q_min', 0.001, 0.3, default_value=0.25
-        )
-        q_max = UniformFloatHyperparameter(
-            'q_max', 0.7, 0.999, default_value=0.75
-        )
+        q_min = UniformFloatHyperparameter("q_min", 0.001, 0.3, default_value=0.25)
+        q_max = UniformFloatHyperparameter("q_max", 0.7, 0.999, default_value=0.75)
         cs.add_hyperparameters((q_min, q_max))
         return cs
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'AutoSklearnPreprocessingAlgorithm':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "AutoSklearnPreprocessingAlgorithm":
         if self.preprocessor is None:
             raise NotFittedError()
         if sparse.isspmatrix(X):
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/standardize.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/standardize.py
index adb156ab93..a1da729907 100644
--- a/autosklearn/pipeline/components/data_preprocessing/rescaling/standardize.py
+++ b/autosklearn/pipeline/components/data_preprocessing/rescaling/standardize.py
@@ -1,52 +1,54 @@
 from typing import Dict, Optional, Tuple, Union
 
 import numpy as np
-
 from scipy import sparse
-
 from sklearn.exceptions import NotFittedError
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
-from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling \
-    import Rescaling
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.data_preprocessing.rescaling.abstract_rescaling import (  # noqa: E501
+    Rescaling,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class StandardScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         from sklearn.preprocessing import StandardScaler
+
         self.preprocessor = StandardScaler(copy=False)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'StandardScaler',
-                'name': 'StandardScaler',
-                'handles_missing_values': False,
-                'handles_nominal_values': False,
-                'handles_numerical_features': True,
-                'prefers_data_scaled': False,
-                'prefers_data_normalized': False,
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                # TODO find out if this is right!
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT,),
-                'preferred_dtype': None}
-
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'AutoSklearnPreprocessingAlgorithm':
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "StandardScaler",
+            "name": "StandardScaler",
+            "handles_missing_values": False,
+            "handles_nominal_values": False,
+            "handles_numerical_features": True,
+            "prefers_data_scaled": False,
+            "prefers_data_normalized": False,
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            # TODO find out if this is right!
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+            "preferred_dtype": None,
+        }
+
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "AutoSklearnPreprocessingAlgorithm":
         if self.preprocessor is None:
             raise NotFittedError()
         if sparse.isspmatrix(X):
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
index 949ce83298..990ad579ca 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py
@@ -1,34 +1,35 @@
-from collections import OrderedDict
-import os
-
 from typing import Any, Dict, Optional
 
+import os
+from collections import OrderedDict
+
 from ConfigSpace import Configuration
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
-
 from sklearn.base import BaseEstimator
 
-from ...base import AutoSklearnPreprocessingAlgorithm, find_components,\
-    ThirdPartyComponents, AutoSklearnChoice, _addons
-
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 
+from ...base import (
+    AutoSklearnChoice,
+    AutoSklearnPreprocessingAlgorithm,
+    ThirdPartyComponents,
+    _addons,
+    find_components,
+)
+
 bow_directory = os.path.split(__file__)[0]
-_bows = find_components(__package__,
-                        bow_directory,
-                        AutoSklearnPreprocessingAlgorithm)
+_bows = find_components(__package__, bow_directory, AutoSklearnPreprocessingAlgorithm)
 
 additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
-_addons['data_preprocessing.text_encoding'] = additional_components
+_addons["data_preprocessing.text_encoding"] = additional_components
 
 
-def add_bow(classifier: 'BagOfWordChoice') -> None:
+def add_bow(classifier: "BagOfWordChoice") -> None:
     additional_components.add_component(classifier)
 
 
 class BagOfWordChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls: BaseEstimator) -> Dict[str, BaseEstimator]:
         components: Dict[str, BaseEstimator] = OrderedDict()
@@ -50,49 +51,53 @@ def get_hyperparameter_search_space(
 
         # Compile a list of legal preprocessors for this problem
         available_preprocessors = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_preprocessors) == 0:
             raise ValueError(
                 "No bag of word encoders found, please add any bag of word encoder"
-                "component.")
+                "component."
+            )
 
         if default is None:
-            defaults = ['bag_of_words_encoding']
+            defaults = ["bag_of_words_encoding"]
             for default_ in defaults:
                 if default_ in available_preprocessors:
                     default = default_
                     break
 
-        preprocessor = CategoricalHyperparameter('__choice__',
-                                                 list(
-                                                     available_preprocessors.keys()),
-                                                 default_value=default)
+        preprocessor = CategoricalHyperparameter(
+            "__choice__", list(available_preprocessors.keys()), default_value=default
+        )
 
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
-            preprocessor_configuration_space = available_preprocessors[name]. \
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+            preprocessor_configuration_space = available_preprocessors[
+                name
+            ].get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": preprocessor, "value": name}
+            cs.add_configuration_space(
+                name,
+                preprocessor_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
 
         self.configuration_space = cs
         self.dataset_properties = dataset_properties
         return cs
 
-    def set_hyperparameters(self, configuration: Configuration,
-                            init_params: Optional[Dict[str, Any]] = None
-                            ) -> 'BagOfWordChoice':
+    def set_hyperparameters(
+        self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None
+    ) -> "BagOfWordChoice":
         new_params = {}
 
         params = configuration.get_dictionary()
-        choice = params['__choice__']
-        del params['__choice__']
+        choice = params["__choice__"]
+        del params["__choice__"]
 
         for param, value in params.items():
-            param = param.replace(choice, '').replace(':', '')
+            param = param.replace(choice, "").replace(":", "")
             new_params[param] = value
 
         if init_params is not None:
@@ -102,10 +107,10 @@ def set_hyperparameters(self, configuration: Configuration,
                 #  in order to not pass it to the no encoding
                 if choice not in param:
                     continue
-                param = param.replace(choice, '').replace(':', '')
+                param = param.replace(choice, "").replace(":", "")
                 new_params[param] = value
 
-        new_params['random_state'] = self.random_state
+        new_params["random_state"] = self.random_state
 
         self.new_params = new_params
         self.choice = self.get_components()[choice](**new_params)
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
index c66c67046b..47a80684f7 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py
@@ -1,19 +1,17 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace import EqualsCondition
+import itertools
 
+import ConfigSpace.hyperparameters as CSH
 import numpy as np
 import pandas as pd
-import itertools
-
+from ConfigSpace import EqualsCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from sklearn.feature_extraction.text import CountVectorizer
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
-
-from sklearn.feature_extraction.text import CountVectorizer
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class BagOfWordEncoder(AutoSklearnPreprocessingAlgorithm):
@@ -23,7 +21,7 @@ def __init__(
         min_df_choice: str = "min_df_absolute",
         min_df_absolute: int = 0,
         min_df_relative: float = 0.01,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         self.ngram_range = ngram_range
         self.random_state = random_state
@@ -31,30 +29,38 @@ def __init__(
         self.min_df_absolute = min_df_absolute
         self.min_df_relative = min_df_relative
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'BagOfWordEncoder':
-
-        if isinstance(X, pd.DataFrame):
-            X.fillna("", inplace=True)
-            # define a CountVectorizer for every feature (implicitly defined by order of columns,
-            # maybe change the list
-            # to a dictionary with features as keys)
-            if self.min_df_choice == "min_df_absolute":
-                self.preprocessor = CountVectorizer(min_df=self.min_df_absolute,
-                                                    ngram_range=(1, self.ngram_range))
-            elif self.min_df_choice == "min_df_relative":
-                self.preprocessor = CountVectorizer(min_df=self.min_df_relative,
-                                                    ngram_range=(1, self.ngram_range))
-            else:
-                raise KeyError()
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "BagOfWordEncoder":
+
+        if not isinstance(X, pd.DataFrame):
+            raise ValueError(
+                "Your text data is not encoded in a pandas.DataFrame\n"
+                "Please make sure to use a pandas.DataFrame and ensure"
+                "that the text features are encoded as strings."
+            )
+
+        X.fillna("", inplace=True)
+
+        # define a CountVectorizer for used on every feature
+        if self.min_df_choice == "min_df_absolute":
+            self.preprocessor = CountVectorizer(
+                min_df=self.min_df_absolute,
+                ngram_range=(1, self.ngram_range),
+            )
 
-            all_text = itertools.chain.from_iterable(X[col] for col in X.columns)
-            self.preprocessor = self.preprocessor.fit(all_text)
+        elif self.min_df_choice == "min_df_relative":
+            self.preprocessor = CountVectorizer(
+                min_df=self.min_df_relative,
+                ngram_range=(1, self.ngram_range),
+            )
 
         else:
-            raise ValueError("Your text data is not encoded in a pandas.DataFrame\n"
-                             "Please make sure to use a pandas.DataFrame and ensure"
-                             "that the text features are encoded as strings.")
+            raise KeyError()
+
+        all_text = itertools.chain.from_iterable(X[col] for col in X.columns)
+        self.preprocessor = self.preprocessor.fit(all_text)
+
         return self
 
     def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
@@ -70,42 +76,55 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X_transformed
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'BOW',
-                'name': 'Bag Of Word Encoder',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,), }
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "BOW",
+            "name": "Bag Of Word Encoder",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
-        hp_ngram_range = CSH.UniformIntegerHyperparameter(name="ngram_range", lower=1, upper=3,
-                                                          default_value=1)
-        hp_min_df_choice_bow = CSH.CategoricalHyperparameter("min_df_choice",
-                                                             choices=["min_df_absolute",
-                                                                      "min_df_relative"])
-        hp_min_df_absolute_bow = CSH.UniformIntegerHyperparameter(name="min_df_absolute", lower=0,
-                                                                  upper=10,
-                                                                  default_value=0)
-        hp_min_df_relative_bow = CSH.UniformFloatHyperparameter(name="min_df_relative", lower=0.01,
-                                                                upper=1.0,
-                                                                default_value=0.01, log=True)
+        hp_ngram_range = CSH.UniformIntegerHyperparameter(
+            name="ngram_range", lower=1, upper=3, default_value=1
+        )
+        hp_min_df_choice_bow = CSH.CategoricalHyperparameter(
+            "min_df_choice", choices=["min_df_absolute", "min_df_relative"]
+        )
+        hp_min_df_absolute_bow = CSH.UniformIntegerHyperparameter(
+            name="min_df_absolute", lower=0, upper=10, default_value=0
+        )
+        hp_min_df_relative_bow = CSH.UniformFloatHyperparameter(
+            name="min_df_relative", lower=0.01, upper=1.0, default_value=0.01, log=True
+        )
         cs.add_hyperparameters(
-            [hp_ngram_range, hp_min_df_choice_bow, hp_min_df_absolute_bow, hp_min_df_relative_bow])
-
-        cond_min_df_absolute_bow = EqualsCondition(hp_min_df_absolute_bow, hp_min_df_choice_bow,
-                                                   "min_df_absolute")
-        cond_min_df_relative_bow = EqualsCondition(hp_min_df_relative_bow, hp_min_df_choice_bow,
-                                                   "min_df_relative")
+            [
+                hp_ngram_range,
+                hp_min_df_choice_bow,
+                hp_min_df_absolute_bow,
+                hp_min_df_relative_bow,
+            ]
+        )
+
+        cond_min_df_absolute_bow = EqualsCondition(
+            hp_min_df_absolute_bow, hp_min_df_choice_bow, "min_df_absolute"
+        )
+        cond_min_df_relative_bow = EqualsCondition(
+            hp_min_df_relative_bow, hp_min_df_choice_bow, "min_df_relative"
+        )
         cs.add_conditions([cond_min_df_absolute_bow, cond_min_df_relative_bow])
 
         # maybe add bigrams ...
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
index 85851ca72e..22a0be1088 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py
@@ -1,18 +1,16 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
 import ConfigSpace.hyperparameters as CSH
-from ConfigSpace import EqualsCondition
-
 import numpy as np
 import pandas as pd
+from ConfigSpace import EqualsCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
 from scipy.sparse import hstack
+from sklearn.feature_extraction.text import CountVectorizer
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
-
-from sklearn.feature_extraction.text import CountVectorizer
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class BagOfWordEncoder(AutoSklearnPreprocessingAlgorithm):
@@ -22,7 +20,7 @@ def __init__(
         min_df_choice: str = "min_df_absolute",
         min_df_absolute: int = 0,
         min_df_relative: float = 0.01,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         self.ngram_range = ngram_range
         self.random_state = random_state
@@ -30,8 +28,9 @@ def __init__(
         self.min_df_absolute = min_df_absolute
         self.min_df_relative = min_df_relative
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'BagOfWordEncoder':
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "BagOfWordEncoder":
 
         if isinstance(X, pd.DataFrame):
             X.fillna("", inplace=True)
@@ -40,8 +39,9 @@ def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
                 self.preprocessor = {}
 
                 for feature in X.columns:
-                    vectorizer = CountVectorizer(min_df=self.min_df_absolute,
-                                                 ngram_range=(1, self.ngram_range)).fit(X[feature])
+                    vectorizer = CountVectorizer(
+                        min_df=self.min_df_absolute, ngram_range=(1, self.ngram_range)
+                    ).fit(X[feature])
                     self.preprocessor[feature] = vectorizer
 
             elif self.min_df_choice == "min_df_relative":
@@ -49,15 +49,18 @@ def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
                 self.preprocessor = {}
 
                 for feature in X.columns:
-                    vectorizer = CountVectorizer(min_df=self.min_df_relative,
-                                                 ngram_range=(1, self.ngram_range)).fit(X[feature])
+                    vectorizer = CountVectorizer(
+                        min_df=self.min_df_relative, ngram_range=(1, self.ngram_range)
+                    ).fit(X[feature])
                     self.preprocessor[feature] = vectorizer
             else:
                 raise KeyError()
         else:
-            raise ValueError("Your text data is not encoded in a pandas.DataFrame\n"
-                             "Please make sure to use a pandas.DataFrame and ensure"
-                             "that the text features are encoded as strings.")
+            raise ValueError(
+                "Your text data is not encoded in a pandas.DataFrame\n"
+                "Please make sure to use a pandas.DataFrame and ensure"
+                "that the text features are encoded as strings."
+            )
         return self
 
     def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
@@ -77,42 +80,55 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X_new
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'BOW',
-                'name': 'Bag Of Word Encoder',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,), }
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "BOW",
+            "name": "Bag Of Word Encoder",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
-        hp_ngram_range = CSH.UniformIntegerHyperparameter(name="ngram_range", lower=1, upper=3,
-                                                          default_value=1)
-        hp_min_df_choice_bow = CSH.CategoricalHyperparameter("min_df_choice",
-                                                             choices=["min_df_absolute",
-                                                                      "min_df_relative"])
-        hp_min_df_absolute_bow = CSH.UniformIntegerHyperparameter(name="min_df_absolute", lower=0,
-                                                                  upper=10,
-                                                                  default_value=0)
-        hp_min_df_relative_bow = CSH.UniformFloatHyperparameter(name="min_df_relative", lower=0.01,
-                                                                upper=1.0,
-                                                                default_value=0.01, log=True)
+        hp_ngram_range = CSH.UniformIntegerHyperparameter(
+            name="ngram_range", lower=1, upper=3, default_value=1
+        )
+        hp_min_df_choice_bow = CSH.CategoricalHyperparameter(
+            "min_df_choice", choices=["min_df_absolute", "min_df_relative"]
+        )
+        hp_min_df_absolute_bow = CSH.UniformIntegerHyperparameter(
+            name="min_df_absolute", lower=0, upper=10, default_value=0
+        )
+        hp_min_df_relative_bow = CSH.UniformFloatHyperparameter(
+            name="min_df_relative", lower=0.01, upper=1.0, default_value=0.01, log=True
+        )
         cs.add_hyperparameters(
-            [hp_ngram_range, hp_min_df_choice_bow, hp_min_df_absolute_bow, hp_min_df_relative_bow])
-
-        cond_min_df_absolute_bow = EqualsCondition(hp_min_df_absolute_bow, hp_min_df_choice_bow,
-                                                   "min_df_absolute")
-        cond_min_df_relative_bow = EqualsCondition(hp_min_df_relative_bow, hp_min_df_choice_bow,
-                                                   "min_df_relative")
+            [
+                hp_ngram_range,
+                hp_min_df_choice_bow,
+                hp_min_df_absolute_bow,
+                hp_min_df_relative_bow,
+            ]
+        )
+
+        cond_min_df_absolute_bow = EqualsCondition(
+            hp_min_df_absolute_bow, hp_min_df_choice_bow, "min_df_absolute"
+        )
+        cond_min_df_relative_bow = EqualsCondition(
+            hp_min_df_relative_bow, hp_min_df_choice_bow, "min_df_relative"
+        )
         cs.add_conditions([cond_min_df_absolute_bow, cond_min_df_relative_bow])
 
         # maybe add bigrams ...
diff --git a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
index 0b7ef239f1..aea4a05906 100644
--- a/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
+++ b/autosklearn/pipeline/components/data_preprocessing/text_encoding/tfidf_encoding.py
@@ -1,18 +1,17 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-import ConfigSpace.hyperparameters as CSH
-from ConfigSpace import EqualsCondition
+import itertools
 
+import ConfigSpace.hyperparameters as CSH
 import numpy as np
 import pandas as pd
-import itertools
+from ConfigSpace import EqualsCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from sklearn.feature_extraction.text import TfidfVectorizer
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
-
-from sklearn.feature_extraction.text import TfidfVectorizer
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class TfidfEncoder(AutoSklearnPreprocessingAlgorithm):
@@ -23,7 +22,7 @@ def __init__(
         min_df_choice: str = "min_df_absolute",
         min_df_absolute: int = 0,
         min_df_relative: float = 0.01,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
     ) -> None:
         self.ngram_range = ngram_range
         self.random_state = random_state
@@ -32,32 +31,41 @@ def __init__(
         self.min_df_absolute = min_df_absolute
         self.min_df_relative = min_df_relative
 
-    def fit(self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
-            ) -> 'TfidfEncoder':
-
-        if isinstance(X, pd.DataFrame):
-            X.fillna("", inplace=True)
-            # define a CountVectorizer for every feature (implicitly defined by order of columns,
-            # maybe change the list
-            # to a dictionary with features as keys)
-            if self.min_df_choice == "min_df_absolute":
-                self.preprocessor = TfidfVectorizer(min_df=self.min_df_absolute,
-                                                    use_idf=self.use_idf,
-                                                    ngram_range=(1, self.ngram_range))
-            elif self.min_df_choice == "min_df_relative":
-                self.preprocessor = TfidfVectorizer(min_df=self.min_df_relative,
-                                                    use_idf=self.use_idf,
-                                                    ngram_range=(1, self.ngram_range))
-            else:
-                raise KeyError()
+    def fit(
+        self,
+        X: PIPELINE_DATA_DTYPE,
+        y: Optional[PIPELINE_DATA_DTYPE] = None,
+    ) -> "TfidfEncoder":
+
+        if not isinstance(X, pd.DataFrame):
+            raise ValueError(
+                "Your text data is not encoded in a pandas.DataFrame\n"
+                "Please make sure to use a pandas.DataFrame and ensure"
+                " that the text features are encoded as strings."
+            )
+
+        X.fillna("", inplace=True)
+
+        if self.min_df_choice == "min_df_absolute":
+            self.preprocessor = TfidfVectorizer(
+                min_df=self.min_df_absolute,
+                use_idf=self.use_idf,
+                ngram_range=(1, self.ngram_range),
+            )
 
-            all_text = itertools.chain.from_iterable(X[col] for col in X.columns)
-            self.preprocessor = self.preprocessor.fit(all_text)
+        elif self.min_df_choice == "min_df_relative":
+            self.preprocessor = TfidfVectorizer(
+                min_df=self.min_df_relative,
+                use_idf=self.use_idf,
+                ngram_range=(1, self.ngram_range),
+            )
 
         else:
-            raise ValueError("Your text data is not encoded in a pandas.DataFrame\n"
-                             "Please make sure to use a pandas.DataFrame and ensure"
-                             " that the text features are encoded as strings.")
+            raise KeyError()
+
+        all_text = itertools.chain.from_iterable(X[col] for col in X.columns)
+        self.preprocessor = self.preprocessor.fit(all_text)
+
         return self
 
     def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
@@ -73,41 +81,57 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return X_transformed
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
-        return {'shortname': 'RBOW',
-                'name': 'Relative Bag Of Word Encoder',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'handles_sparse': True,
-                'handles_dense': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,), }
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            "shortname": "RBOW",
+            "name": "Relative Bag Of Word Encoder",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
-        hp_ngram_range = CSH.UniformIntegerHyperparameter(name="ngram_range", lower=1, upper=3,
-                                                          default_value=1)
+        hp_ngram_range = CSH.UniformIntegerHyperparameter(
+            name="ngram_range", lower=1, upper=3, default_value=1
+        )
         hp_use_idf = CSH.CategoricalHyperparameter("use_idf", choices=[False, True])
-        hp_min_df_choice = CSH.CategoricalHyperparameter("min_df_choice",
-                                                         choices=["min_df_absolute",
-                                                                  "min_df_relative"])
-        hp_min_df_absolute = CSH.UniformIntegerHyperparameter(name="min_df_absolute", lower=0,
-                                                              upper=10, default_value=0)
-        hp_min_df_relative = CSH.UniformFloatHyperparameter(name="min_df_relative", lower=0.01,
-                                                            upper=1.0, default_value=0.01, log=True)
+        hp_min_df_choice = CSH.CategoricalHyperparameter(
+            "min_df_choice", choices=["min_df_absolute", "min_df_relative"]
+        )
+        hp_min_df_absolute = CSH.UniformIntegerHyperparameter(
+            name="min_df_absolute", lower=0, upper=10, default_value=0
+        )
+        hp_min_df_relative = CSH.UniformFloatHyperparameter(
+            name="min_df_relative", lower=0.01, upper=1.0, default_value=0.01, log=True
+        )
         cs.add_hyperparameters(
-            [hp_ngram_range, hp_use_idf, hp_min_df_choice, hp_min_df_absolute, hp_min_df_relative])
-
-        cond_min_df_absolute = EqualsCondition(hp_min_df_absolute, hp_min_df_choice,
-                                               "min_df_absolute")
-        cond_min_df_relative = EqualsCondition(hp_min_df_relative, hp_min_df_choice,
-                                               "min_df_relative")
+            [
+                hp_ngram_range,
+                hp_use_idf,
+                hp_min_df_choice,
+                hp_min_df_absolute,
+                hp_min_df_relative,
+            ]
+        )
+
+        cond_min_df_absolute = EqualsCondition(
+            hp_min_df_absolute, hp_min_df_choice, "min_df_absolute"
+        )
+        cond_min_df_relative = EqualsCondition(
+            hp_min_df_relative, hp_min_df_choice, "min_df_relative"
+        )
         cs.add_conditions([cond_min_df_absolute, cond_min_df_relative])
 
         # maybe add bigrams ...
diff --git a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
index c6dc42a4df..365ae405a0 100644
--- a/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
+++ b/autosklearn/pipeline/components/data_preprocessing/variance_threshold/variance_threshold.py
@@ -1,29 +1,25 @@
 from typing import Dict, Optional, Tuple, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-
 import numpy as np
+import sklearn.feature_selection
+from ConfigSpace.configuration_space import ConfigurationSpace
 
 from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE, PIPELINE_DATA_DTYPE
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
-
-import sklearn.feature_selection
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class VarianceThreshold(AutoSklearnPreprocessingAlgorithm):
     def __init__(
-        self,
-        random_state: Optional[Union[int, np.random.RandomState]] = None
+        self, random_state: Optional[Union[int, np.random.RandomState]] = None
     ) -> None:
         # VarianceThreshold does not support fit_transform (as of 0.19.1)!
         self.random_state = random_state
 
-    def fit(self, X: PIPELINE_DATA_DTYPE,
-            y: Optional[PIPELINE_DATA_DTYPE] = None) -> 'VarianceThreshold':
-        self.preprocessor = sklearn.feature_selection.VarianceThreshold(
-            threshold=0.0
-        )
+    def fit(
+        self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None
+    ) -> "VarianceThreshold":
+        self.preprocessor = sklearn.feature_selection.VarianceThreshold(threshold=0.0)
         self.preprocessor = self.preprocessor.fit(X)
         return self
 
@@ -33,25 +29,27 @@ def transform(self, X: PIPELINE_DATA_DTYPE) -> PIPELINE_DATA_DTYPE:
         return self.preprocessor.transform(X)
 
     @staticmethod
-    def get_properties(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                       ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
         return {
-            'shortname': 'Variance Threshold',
-            'name': 'Variance Threshold (constant feature removal)',
-            'handles_regression': True,
-            'handles_classification': True,
-            'handles_multiclass': True,
-            'handles_multilabel': True,
-            'handles_multioutput': True,
-            'is_deterministic': True,
-            'handles_sparse': True,
-            'handles_dense': True,
-            'input': (DENSE, SPARSE, UNSIGNED_DATA),
-            'output': (INPUT,),
+            "shortname": "Variance Threshold",
+            "name": "Variance Threshold (constant feature removal)",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "handles_sparse": True,
+            "handles_dense": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
         }
 
     @staticmethod
-    def get_hyperparameter_search_space(dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
-                                        ) -> ConfigurationSpace:
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
         cs = ConfigurationSpace()
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
index e124d135d0..cd52d6ad34 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -1,18 +1,25 @@
+from typing import Type
+
 import os
 from collections import OrderedDict
-from typing import Type
 
-from ..base import AutoSklearnPreprocessingAlgorithm, find_components, \
-    ThirdPartyComponents, AutoSklearnChoice, _addons
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from ..base import (
+    AutoSklearnChoice,
+    AutoSklearnPreprocessingAlgorithm,
+    ThirdPartyComponents,
+    _addons,
+    find_components,
+)
+
 classifier_directory = os.path.split(__file__)[0]
-_preprocessors = find_components(__package__,
-                                 classifier_directory,
-                                 AutoSklearnPreprocessingAlgorithm)
+_preprocessors = find_components(
+    __package__, classifier_directory, AutoSklearnPreprocessingAlgorithm
+)
 additional_components = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
-_addons['feature_preprocessing'] = additional_components
+_addons["feature_preprocessing"] = additional_components
 
 
 def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
@@ -20,7 +27,6 @@ def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> N
 
 
 class FeaturePreprocessorChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls):
         components = OrderedDict()
@@ -28,23 +34,25 @@ def get_components(cls):
         components.update(additional_components.components)
         return components
 
-    def get_available_components(self, dataset_properties=None,
-                                 include=None,
-                                 exclude=None):
+    def get_available_components(
+        self, dataset_properties=None, include=None, exclude=None
+    ):
         if dataset_properties is None:
             dataset_properties = {}
 
         if include is not None and exclude is not None:
             raise ValueError(
-                "The argument include and exclude cannot be used together.")
+                "The argument include and exclude cannot be used together."
+            )
 
         available_comp = self.get_components()
 
         if include is not None:
             for incl in include:
                 if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
+                    raise ValueError(
+                        "Trying to include unknown component: " "%s" % incl
+                    )
 
         # TODO check for task type classification and/or regression!
 
@@ -58,38 +66,43 @@ def get_available_components(self, dataset_properties=None,
             entry = available_comp[name]
 
             # Exclude itself to avoid infinite loop
-            if entry == FeaturePreprocessorChoice or hasattr(entry, 'get_components'):
+            if entry == FeaturePreprocessorChoice or hasattr(entry, "get_components"):
                 continue
 
-            target_type = dataset_properties['target_type']
-            if target_type == 'classification':
-                if entry.get_properties()['handles_classification'] is False:
+            target_type = dataset_properties["target_type"]
+            if target_type == "classification":
+                if entry.get_properties()["handles_classification"] is False:
                     continue
-                if dataset_properties.get('multiclass') is True and \
-                        entry.get_properties()['handles_multiclass'] is False:
+                if (
+                    dataset_properties.get("multiclass") is True
+                    and entry.get_properties()["handles_multiclass"] is False
+                ):
                     continue
-                if dataset_properties.get('multilabel') is True and \
-                        entry.get_properties()['handles_multilabel'] is False:
+                if (
+                    dataset_properties.get("multilabel") is True
+                    and entry.get_properties()["handles_multilabel"] is False
+                ):
                     continue
 
-            elif target_type == 'regression':
-                if entry.get_properties()['handles_regression'] is False:
+            elif target_type == "regression":
+                if entry.get_properties()["handles_regression"] is False:
                     continue
-                if dataset_properties.get('multioutput') is True and \
-                        entry.get_properties()['handles_multioutput'] is False:
+                if (
+                    dataset_properties.get("multioutput") is True
+                    and entry.get_properties()["handles_multioutput"] is False
+                ):
                     continue
 
             else:
-                raise ValueError('Unknown target type %s' % target_type)
+                raise ValueError("Unknown target type %s" % target_type)
 
             components_dict[name] = entry
 
         return components_dict
 
-    def get_hyperparameter_search_space(self, dataset_properties=None,
-                                        default=None,
-                                        include=None,
-                                        exclude=None):
+    def get_hyperparameter_search_space(
+        self, dataset_properties=None, default=None, include=None, exclude=None
+    ):
         cs = ConfigurationSpace()
 
         if dataset_properties is None:
@@ -97,32 +110,33 @@ def get_hyperparameter_search_space(self, dataset_properties=None,
 
         # Compile a list of legal preprocessors for this problem
         available_preprocessors = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include, exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_preprocessors) == 0:
-            raise ValueError(
-                "No preprocessors found, please add NoPreprocessing")
+            raise ValueError("No preprocessors found, please add NoPreprocessing")
 
         if default is None:
-            defaults = ['no_preprocessing', 'select_percentile', 'pca',
-                        'truncatedSVD']
+            defaults = ["no_preprocessing", "select_percentile", "pca", "truncatedSVD"]
             for default_ in defaults:
                 if default_ in available_preprocessors:
                     default = default_
                     break
 
-        preprocessor = CategoricalHyperparameter('__choice__',
-                                                 list(
-                                                     available_preprocessors.keys()),
-                                                 default_value=default)
+        preprocessor = CategoricalHyperparameter(
+            "__choice__", list(available_preprocessors.keys()), default_value=default
+        )
         cs.add_hyperparameter(preprocessor)
         for name in available_preprocessors:
-            preprocessor_configuration_space = available_preprocessors[name]. \
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': preprocessor, 'value': name}
-            cs.add_configuration_space(name, preprocessor_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+            preprocessor_configuration_space = available_preprocessors[
+                name
+            ].get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": preprocessor, "value": name}
+            cs.add_configuration_space(
+                name,
+                preprocessor_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
 
         return cs
 
diff --git a/autosklearn/pipeline/components/feature_preprocessing/densifier.py b/autosklearn/pipeline/components/feature_preprocessing/densifier.py
index 0f0732f298..f5c88ecadf 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/densifier.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/densifier.py
@@ -1,7 +1,7 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, UNSIGNED_DATA, DENSE, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class Densifier(AutoSklearnPreprocessingAlgorithm):
@@ -14,6 +14,7 @@ def fit(self, X, y=None):
 
     def transform(self, X):
         from scipy import sparse
+
         if sparse.issparse(X):
             return X.todense().getA()
         else:
@@ -21,16 +22,18 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'RandomTreesEmbedding',
-                'name': 'Random Trees Embedding',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (SPARSE, UNSIGNED_DATA),
-                'output': (DENSE, INPUT)}
+        return {
+            "shortname": "RandomTreesEmbedding",
+            "name": "Random Trees Embedding",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (SPARSE, UNSIGNED_DATA),
+            "output": (DENSE, INPUT),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
index 622180af8f..dad45795b8 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_classification.py
@@ -1,27 +1,43 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, \
-    UnParametrizedHyperparameter, Constant
-
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
+
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
 
 class ExtraTreesPreprocessorClassification(AutoSklearnPreprocessingAlgorithm):
-
-    def __init__(self, n_estimators, criterion, min_samples_leaf,
-                 min_samples_split, max_features, bootstrap, max_leaf_nodes,
-                 max_depth, min_weight_fraction_leaf, min_impurity_decrease,
-                 oob_score=False, n_jobs=1, random_state=None, verbose=0,
-                 class_weight=None):
+    def __init__(
+        self,
+        n_estimators,
+        criterion,
+        min_samples_leaf,
+        min_samples_split,
+        max_features,
+        bootstrap,
+        max_leaf_nodes,
+        max_depth,
+        min_weight_fraction_leaf,
+        min_impurity_decrease,
+        oob_score=False,
+        n_jobs=1,
+        random_state=None,
+        verbose=0,
+        class_weight=None,
+    ):
 
         self.n_estimators = n_estimators
         self.estimator_increment = 10
         if criterion not in ("gini", "entropy"):
-            raise ValueError("'criterion' is not in ('gini', 'entropy'): "
-                             "%s" % criterion)
+            raise ValueError(
+                "'criterion' is not in ('gini', 'entropy'): " "%s" % criterion
+            )
         self.criterion = criterion
         self.min_samples_leaf = min_samples_leaf
         self.min_samples_split = min_samples_split
@@ -78,11 +94,12 @@ def fit(self, X, Y, sample_weight=None):
             n_jobs=self.n_jobs,
             verbose=self.verbose,
             random_state=self.random_state,
-            class_weight=self.class_weight)
+            class_weight=self.class_weight,
+        )
         estimator.fit(X, Y, sample_weight=sample_weight)
-        self.preprocessor = SelectFromModel(estimator=estimator,
-                                            threshold='mean',
-                                            prefit=True)
+        self.preprocessor = SelectFromModel(
+            estimator=estimator, threshold="mean", prefit=True
+        )
         return self
 
     def transform(self, X):
@@ -92,16 +109,18 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'ETC',
-                'name': 'Extra Trees Classifier Preprocessing',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "ETC",
+            "name": "Extra Trees Classifier Preprocessing",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -109,28 +128,45 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
         n_estimators = Constant("n_estimators", 100)
         criterion = CategoricalHyperparameter(
-            "criterion", ["gini", "entropy"], default_value="gini")
-        max_features = UniformFloatHyperparameter("max_features", 0, 1,
-                                                  default_value=0.5)
+            "criterion", ["gini", "entropy"], default_value="gini"
+        )
+        max_features = UniformFloatHyperparameter(
+            "max_features", 0, 1, default_value=0.5
+        )
 
         max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
 
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
         min_weight_fraction_leaf = UnParametrizedHyperparameter(
-            'min_weight_fraction_leaf', 0.)
+            "min_weight_fraction_leaf", 0.0
+        )
         min_impurity_decrease = UnParametrizedHyperparameter(
-            'min_impurity_decrease', 0.)
+            "min_impurity_decrease", 0.0
+        )
 
         bootstrap = CategoricalHyperparameter(
-            "bootstrap", ["True", "False"], default_value="False")
-
-        cs.add_hyperparameters([n_estimators, criterion, max_features,
-                                max_depth, max_leaf_nodes, min_samples_split,
-                                min_samples_leaf, min_weight_fraction_leaf,
-                                min_impurity_decrease, bootstrap])
+            "bootstrap", ["True", "False"], default_value="False"
+        )
+
+        cs.add_hyperparameters(
+            [
+                n_estimators,
+                criterion,
+                max_features,
+                max_depth,
+                max_leaf_nodes,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                min_impurity_decrease,
+                bootstrap,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
index e8e28a2736..3287b837c5 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/extra_trees_preproc_for_regression.py
@@ -1,29 +1,43 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, \
-    UnParametrizedHyperparameter, Constant
-
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
+
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
 
 class ExtraTreesPreprocessorRegression(AutoSklearnPreprocessingAlgorithm):
-
-    def __init__(self, n_estimators, criterion, min_samples_leaf,
-                 min_samples_split, max_features,
-                 bootstrap=False, max_leaf_nodes=None, max_depth="None",
-                 min_weight_fraction_leaf=0.0,
-                 oob_score=False, n_jobs=1, random_state=None, verbose=0):
+    def __init__(
+        self,
+        n_estimators,
+        criterion,
+        min_samples_leaf,
+        min_samples_split,
+        max_features,
+        bootstrap=False,
+        max_leaf_nodes=None,
+        max_depth="None",
+        min_weight_fraction_leaf=0.0,
+        oob_score=False,
+        n_jobs=1,
+        random_state=None,
+        verbose=0,
+    ):
 
         self.n_estimators = n_estimators
         self.estimator_increment = 10
         if criterion not in ("mse", "friedman_mse", "mae"):
-            raise ValueError("'criterion' is not in ('mse', 'friedman_mse', "
-                             "'mae'): %s" % criterion)
+            raise ValueError(
+                "'criterion' is not in ('mse', 'friedman_mse', "
+                "'mae'): %s" % criterion
+            )
         self.criterion = criterion
         self.min_samples_leaf = min_samples_leaf
         self.min_samples_split = min_samples_split
@@ -64,23 +78,29 @@ def fit(self, X, Y):
         self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf)
 
         num_features = X.shape[1]
-        max_features = int(
-            float(self.max_features) * (np.log(num_features) + 1))
+        max_features = int(float(self.max_features) * (np.log(num_features) + 1))
         # Use at most half of the features
         max_features = max(1, min(int(X.shape[1] / 2), max_features))
         estimator = ExtraTreesRegressor(
-            n_estimators=self.n_estimators, criterion=self.criterion,
-            max_depth=self.max_depth, min_samples_split=self.min_samples_split,
-            min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap,
-            max_features=max_features, max_leaf_nodes=self.max_leaf_nodes,
-            oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose,
+            n_estimators=self.n_estimators,
+            criterion=self.criterion,
+            max_depth=self.max_depth,
+            min_samples_split=self.min_samples_split,
+            min_samples_leaf=self.min_samples_leaf,
+            bootstrap=self.bootstrap,
+            max_features=max_features,
+            max_leaf_nodes=self.max_leaf_nodes,
+            oob_score=self.oob_score,
+            n_jobs=self.n_jobs,
+            verbose=self.verbose,
             min_weight_fraction_leaf=self.min_weight_fraction_leaf,
-            random_state=self.random_state)
+            random_state=self.random_state,
+        )
 
         estimator.fit(X, Y)
-        self.preprocessor = SelectFromModel(estimator=estimator,
-                                            threshold='mean',
-                                            prefit=True)
+        self.preprocessor = SelectFromModel(
+            estimator=estimator, threshold="mean", prefit=True
+        )
 
         return self
 
@@ -91,42 +111,58 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'ETR',
-                'name': 'Extra Trees Regressor Preprocessing',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "ETR",
+            "name": "Extra Trees Regressor Preprocessing",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_estimators = Constant("n_estimators", 100)
-        criterion = CategoricalHyperparameter("criterion",
-                                              ["mse", 'friedman_mse', 'mae'])
+        criterion = CategoricalHyperparameter(
+            "criterion", ["mse", "friedman_mse", "mae"]
+        )
         max_features = UniformFloatHyperparameter(
-            "max_features", 0.1, 1.0, default_value=1.0)
+            "max_features", 0.1, 1.0, default_value=1.0
+        )
 
         max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
 
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
-        min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 0.)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
+        min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0)
 
         bootstrap = CategoricalHyperparameter(
-            "bootstrap", ["True", "False"], default_value="False")
-
-        cs.add_hyperparameters([n_estimators, criterion, max_features, max_depth,
-                                max_leaf_nodes, min_samples_split,
-                                min_samples_leaf, min_weight_fraction_leaf,
-                                bootstrap])
+            "bootstrap", ["True", "False"], default_value="False"
+        )
+
+        cs.add_hyperparameters(
+            [
+                n_estimators,
+                criterion,
+                max_features,
+                max_depth,
+                max_leaf_nodes,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                bootstrap,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py b/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py
index 549d708506..695ff3c2cc 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/fast_ica.py
@@ -1,19 +1,19 @@
 import warnings
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter
 from ConfigSpace.conditions import EqualsCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import INPUT, UNSIGNED_DATA, DENSE
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
 
 class FastICA(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, algorithm, whiten, fun, n_components=None,
-                 random_state=None):
+    def __init__(self, algorithm, whiten, fun, n_components=None, random_state=None):
         self.algorithm = algorithm
         self.whiten = whiten
         self.fun = fun
@@ -31,18 +31,25 @@ def fit(self, X, Y=None):
             self.n_components = int(self.n_components)
 
         self.preprocessor = sklearn.decomposition.FastICA(
-            n_components=self.n_components, algorithm=self.algorithm,
-            fun=self.fun, whiten=self.whiten, random_state=self.random_state
+            n_components=self.n_components,
+            algorithm=self.algorithm,
+            fun=self.fun,
+            whiten=self.whiten,
+            random_state=self.random_state,
         )
         # Make the RuntimeWarning an Exception!
         with warnings.catch_warnings():
-            warnings.filterwarnings("error", message='array must not contain infs or NaNs')
+            warnings.filterwarnings(
+                "error", message="array must not contain infs or NaNs"
+            )
             try:
                 self.preprocessor.fit(X)
             except ValueError as e:
-                if 'array must not contain infs or NaNs' in e.args[0]:
-                    raise ValueError("Bug in scikit-learn: "
-                                     "https://github.com/scikit-learn/scikit-learn/pull/2738")
+                if "array must not contain infs or NaNs" in e.args[0]:
+                    raise ValueError(
+                        "Bug in scikit-learn: "
+                        "https://github.com/scikit-learn/scikit-learn/pull/2738"
+                    )
 
         return self
 
@@ -53,25 +60,31 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'FastICA',
-                'name': 'Fast Independent Component Analysis',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': False,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (INPUT, UNSIGNED_DATA)}
+        return {
+            "shortname": "FastICA",
+            "name": "Fast Independent Component Analysis",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": False,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (INPUT, UNSIGNED_DATA),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
-        n_components = UniformIntegerHyperparameter("n_components", 10, 2000, default_value=100)
-        algorithm = CategoricalHyperparameter('algorithm', ['parallel', 'deflation'], 'parallel')
-        whiten = CategoricalHyperparameter('whiten', ['False', 'True'], 'False')
-        fun = CategoricalHyperparameter('fun', ['logcosh', 'exp', 'cube'], 'logcosh')
+        n_components = UniformIntegerHyperparameter(
+            "n_components", 10, 2000, default_value=100
+        )
+        algorithm = CategoricalHyperparameter(
+            "algorithm", ["parallel", "deflation"], "parallel"
+        )
+        whiten = CategoricalHyperparameter("whiten", ["False", "True"], "False")
+        fun = CategoricalHyperparameter("fun", ["logcosh", "exp", "cube"], "logcosh")
         cs.add_hyperparameters([n_components, algorithm, whiten, fun])
 
         cs.add_condition(EqualsCondition(n_components, whiten, "True"))
diff --git a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
index e23ff1b865..d51242de21 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/feature_agglomeration.py
@@ -1,28 +1,28 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter
-from ConfigSpace.forbidden import ForbiddenInClause, \
-    ForbiddenAndConjunction, ForbiddenEqualsClause
+from ConfigSpace.forbidden import (
+    ForbiddenAndConjunction,
+    ForbiddenEqualsClause,
+    ForbiddenInClause,
+)
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, UNSIGNED_DATA
 
 
 class FeatureAgglomeration(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, n_clusters, affinity, linkage, pooling_func,
-                 random_state=None):
+    def __init__(self, n_clusters, affinity, linkage, pooling_func, random_state=None):
         self.n_clusters = n_clusters
         self.affinity = affinity
         self.linkage = linkage
         self.pooling_func = pooling_func
         self.random_state = random_state
 
-        self.pooling_func_mapping = dict(mean=np.mean,
-                                         median=np.median,
-                                         max=np.max)
+        self.pooling_func_mapping = dict(mean=np.mean, median=np.median, max=np.max)
 
     def fit(self, X, Y=None):
         import sklearn.cluster
@@ -34,8 +34,11 @@ def fit(self, X, Y=None):
             self.pooling_func = self.pooling_func_mapping[self.pooling_func]
 
         self.preprocessor = sklearn.cluster.FeatureAgglomeration(
-            n_clusters=n_clusters, affinity=self.affinity,
-            linkage=self.linkage, pooling_func=self.pooling_func)
+            n_clusters=n_clusters,
+            affinity=self.affinity,
+            linkage=self.linkage,
+            pooling_func=self.pooling_func,
+        )
         self.preprocessor.fit(X)
         return self
 
@@ -46,32 +49,38 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'Feature Agglomeration',
-                'name': 'Feature Agglomeration',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "Feature Agglomeration",
+            "name": "Feature Agglomeration",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 25)
         affinity = CategoricalHyperparameter(
-            "affinity", ["euclidean", "manhattan", "cosine"], "euclidean")
+            "affinity", ["euclidean", "manhattan", "cosine"], "euclidean"
+        )
         linkage = CategoricalHyperparameter(
-            "linkage", ["ward", "complete", "average"], "ward")
+            "linkage", ["ward", "complete", "average"], "ward"
+        )
         pooling_func = CategoricalHyperparameter(
-            "pooling_func", ["mean", "median", "max"])
+            "pooling_func", ["mean", "median", "max"]
+        )
 
         cs.add_hyperparameters([n_clusters, affinity, linkage, pooling_func])
 
         affinity_and_linkage = ForbiddenAndConjunction(
             ForbiddenInClause(affinity, ["manhattan", "cosine"]),
-            ForbiddenEqualsClause(linkage, "ward"))
+            ForbiddenEqualsClause(linkage, "ward"),
+        )
         cs.add_forbidden_clause(affinity_and_linkage)
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
index 7ed0086248..4e96bfb1c2 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kernel_pca.py
@@ -1,20 +1,22 @@
 import warnings
 
 import numpy as np
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter, UniformFloatHyperparameter
 from ConfigSpace.conditions import EqualsCondition, InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA
 
 
 class KernelPCA(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, n_components, kernel, degree=3, gamma=0.25, coef0=0.0,
-                 random_state=None):
+    def __init__(
+        self, n_components, kernel, degree=3, gamma=0.25, coef0=0.0, random_state=None
+    ):
         self.n_components = n_components
         self.kernel = kernel
         self.degree = degree
@@ -32,9 +34,14 @@ def fit(self, X, Y=None):
         self.coef0 = float(self.coef0)
 
         self.preprocessor = sklearn.decomposition.KernelPCA(
-            n_components=self.n_components, kernel=self.kernel,
-            degree=self.degree, gamma=self.gamma, coef0=self.coef0,
-            remove_zero_eig=True, random_state=self.random_state)
+            n_components=self.n_components,
+            kernel=self.kernel,
+            degree=self.degree,
+            gamma=self.gamma,
+            coef0=self.coef0,
+            remove_zero_eig=True,
+            random_state=self.random_state,
+        )
         if scipy.sparse.issparse(X):
             X = X.astype(np.float64)
         with warnings.catch_warnings():
@@ -43,7 +50,7 @@ def fit(self, X, Y=None):
         # Raise an informative error message, equation is based ~line 249 in
         # kernel_pca.py in scikit-learn
         if len(self.preprocessor.alphas_ / self.preprocessor.lambdas_) == 0:
-            raise ValueError('KernelPCA removed all features!')
+            raise ValueError("KernelPCA removed all features!")
         return self
 
     def transform(self, X):
@@ -61,29 +68,35 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'KernelPCA',
-                'name': 'Kernel Principal Component Analysis',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': False,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (DENSE, UNSIGNED_DATA)}
+        return {
+            "shortname": "KernelPCA",
+            "name": "Kernel Principal Component Analysis",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": False,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (DENSE, UNSIGNED_DATA),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         n_components = UniformIntegerHyperparameter(
-            "n_components", 10, 2000, default_value=100)
-        kernel = CategoricalHyperparameter('kernel', ['poly', 'rbf', 'sigmoid', 'cosine'], 'rbf')
+            "n_components", 10, 2000, default_value=100
+        )
+        kernel = CategoricalHyperparameter(
+            "kernel", ["poly", "rbf", "sigmoid", "cosine"], "rbf"
+        )
         gamma = UniformFloatHyperparameter(
             "gamma",
-            3.0517578125e-05, 8,
+            3.0517578125e-05,
+            8,
             log=True,
             default_value=0.01,
         )
-        degree = UniformIntegerHyperparameter('degree', 2, 5, 3)
+        degree = UniformIntegerHyperparameter("degree", 2, 5, 3)
         coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0)
         cs = ConfigurationSpace()
         cs.add_hyperparameters([n_components, kernel, degree, gamma, coef0])
diff --git a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
index 12ff57c21d..a81e9ddd78 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/kitchen_sinks.py
@@ -1,21 +1,22 @@
 from typing import Optional, Union
 
-from numpy.random import RandomState
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+from numpy.random import RandomState
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class RandomKitchenSinks(AutoSklearnPreprocessingAlgorithm):
-
     def __init__(
         self,
         gamma: float,
         n_components: int,
-        random_state: Optional[Union[int, RandomState]] = None
+        random_state: Optional[Union[int, RandomState]] = None,
     ) -> None:
         """
         Parameters
@@ -42,7 +43,7 @@ def fit(self, X, Y=None):
         self.preprocessor = sklearn.kernel_approximation.RBFSampler(
             gamma=self.gamma,
             n_components=self.n_components,
-            random_state=self.random_state
+            random_state=self.random_state,
         )
         self.preprocessor.fit(X)
         return self
@@ -54,23 +55,27 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'KitchenSink',
-                'name': 'Random Kitchen Sinks',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT, UNSIGNED_DATA)}
+        return {
+            "shortname": "KitchenSink",
+            "name": "Random Kitchen Sinks",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT, UNSIGNED_DATA),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         gamma = UniformFloatHyperparameter(
-            "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True)
+            "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True
+        )
         n_components = UniformIntegerHyperparameter(
-            "n_components", 50, 10000, default_value=100, log=True)
+            "n_components", 50, 10000, default_value=100, log=True
+        )
         cs = ConfigurationSpace()
         cs.add_hyperparameters([gamma, n_components])
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
index 6e6de1a998..546c8742ad 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/liblinear_svc_preprocessor.py
@@ -1,19 +1,31 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter, Constant
-from ConfigSpace.forbidden import ForbiddenEqualsClause, \
-    ForbiddenAndConjunction
+from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
 
 class LibLinear_Preprocessor(AutoSklearnPreprocessingAlgorithm):
     # Liblinear is not deterministic as it uses a RNG inside
-    def __init__(self, penalty, loss, dual, tol, C, multi_class,
-                 fit_intercept, intercept_scaling, class_weight=None,
-                 random_state=None):
+    def __init__(
+        self,
+        penalty,
+        loss,
+        dual,
+        tol,
+        C,
+        multi_class,
+        fit_intercept,
+        intercept_scaling,
+        class_weight=None,
+        random_state=None,
+    ):
         self.penalty = penalty
         self.loss = loss
         self.dual = dual
@@ -39,21 +51,23 @@ def fit(self, X, Y):
         if check_none(self.class_weight):
             self.class_weight = None
 
-        estimator = sklearn.svm.LinearSVC(penalty=self.penalty,
-                                          loss=self.loss,
-                                          dual=self.dual,
-                                          tol=self.tol,
-                                          C=self.C,
-                                          class_weight=self.class_weight,
-                                          fit_intercept=self.fit_intercept,
-                                          intercept_scaling=self.intercept_scaling,
-                                          multi_class=self.multi_class,
-                                          random_state=self.random_state)
+        estimator = sklearn.svm.LinearSVC(
+            penalty=self.penalty,
+            loss=self.loss,
+            dual=self.dual,
+            tol=self.tol,
+            C=self.C,
+            class_weight=self.class_weight,
+            fit_intercept=self.fit_intercept,
+            intercept_scaling=self.intercept_scaling,
+            multi_class=self.multi_class,
+            random_state=self.random_state,
+        )
 
         estimator.fit(X, Y)
-        self.preprocessor = SelectFromModel(estimator=estimator,
-                                            threshold='mean',
-                                            prefit=True)
+        self.preprocessor = SelectFromModel(
+            estimator=estimator, threshold="mean", prefit=True
+        )
 
         return self
 
@@ -64,15 +78,17 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'LinearSVC Preprocessor',
-                'name': 'Liblinear Support Vector Classification Preprocessing',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "LinearSVC Preprocessor",
+            "name": "Liblinear Support Vector Classification Preprocessing",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -80,22 +96,25 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
         penalty = Constant("penalty", "l1")
         loss = CategoricalHyperparameter(
-            "loss", ["hinge", "squared_hinge"], default_value="squared_hinge")
+            "loss", ["hinge", "squared_hinge"], default_value="squared_hinge"
+        )
         dual = Constant("dual", "False")
         # This is set ad-hoc
-        tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True)
+        tol = UniformFloatHyperparameter(
+            "tol", 1e-5, 1e-1, default_value=1e-4, log=True
+        )
         C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0)
         multi_class = Constant("multi_class", "ovr")
         # These are set ad-hoc
         fit_intercept = Constant("fit_intercept", "True")
         intercept_scaling = Constant("intercept_scaling", 1)
 
-        cs.add_hyperparameters([penalty, loss, dual, tol, C, multi_class,
-                                fit_intercept, intercept_scaling])
+        cs.add_hyperparameters(
+            [penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling]
+        )
 
         penalty_and_loss = ForbiddenAndConjunction(
-            ForbiddenEqualsClause(penalty, "l1"),
-            ForbiddenEqualsClause(loss, "hinge")
+            ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")
         )
         cs.add_forbidden_clause(penalty_and_loss)
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
index 92e949b46d..550872d551 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/no_preprocessing.py
@@ -1,16 +1,15 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
-
     def __init__(self, random_state):
-        """ This preprocessors does not change the data """
+        """This preprocessors does not change the data"""
 
     def fit(self, X, Y=None):
-        self.preprocessor = 'passthrough'
+        self.preprocessor = "passthrough"
         self.fitted_ = True
         return self
 
@@ -21,16 +20,18 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'no',
-                'name': 'NoPreprocessing',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "no",
+            "name": "NoPreprocessing",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
diff --git a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
index d450d8f09f..097f59e0f1 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/nystroem_sampler.py
@@ -1,17 +1,26 @@
 import numpy as np
-
+from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter
-from ConfigSpace.conditions import InCondition, EqualsCondition
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT, SIGNED_DATA
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
 
 
 class Nystroem(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, kernel, n_components, gamma=1.0, degree=3,
-                 coef0=1, random_state=None):
+    def __init__(
+        self, kernel, n_components, gamma=1.0, degree=3, coef0=1, random_state=None
+    ):
         self.kernel = kernel
         self.n_components = n_components
         self.gamma = gamma
@@ -29,13 +38,17 @@ def fit(self, X, Y=None):
         self.coef0 = float(self.coef0)
 
         self.preprocessor = sklearn.kernel_approximation.Nystroem(
-            kernel=self.kernel, n_components=self.n_components,
-            gamma=self.gamma, degree=self.degree, coef0=self.coef0,
-            random_state=self.random_state)
+            kernel=self.kernel,
+            n_components=self.n_components,
+            gamma=self.gamma,
+            degree=self.degree,
+            coef0=self.coef0,
+            random_state=self.random_state,
+        )
 
         # Because the pipeline guarantees that each feature is positive,
         # clip all values below zero to zero
-        if self.kernel == 'chi2':
+        if self.kernel == "chi2":
             if scipy.sparse.issparse(X):
                 X.data[X.data < 0] = 0.0
             else:
@@ -49,7 +62,7 @@ def transform(self, X):
 
         # Because the pipeline guarantees that each feature is positive,
         # clip all values below zero to zero
-        if self.kernel == 'chi2':
+        if self.kernel == "chi2":
             if scipy.sparse.issparse(X):
                 X.data[X.data < 0] = 0.0
             else:
@@ -64,38 +77,43 @@ def get_properties(dataset_properties=None):
         data_type = UNSIGNED_DATA
 
         if dataset_properties is not None:
-            signed = dataset_properties.get('signed')
+            signed = dataset_properties.get("signed")
             if signed is not None:
                 data_type = SIGNED_DATA if signed is True else UNSIGNED_DATA
-        return {'shortname': 'Nystroem',
-                'name': 'Nystroem kernel approximation',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (SPARSE, DENSE, data_type),
-                'output': (INPUT, UNSIGNED_DATA)}
+        return {
+            "shortname": "Nystroem",
+            "name": "Nystroem kernel approximation",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, data_type),
+            "output": (INPUT, UNSIGNED_DATA),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
-        if dataset_properties is not None and \
-                (dataset_properties.get("sparse") is True or
-                 dataset_properties.get("signed") is False):
+        if dataset_properties is not None and (
+            dataset_properties.get("sparse") is True
+            or dataset_properties.get("signed") is False
+        ):
             allow_chi2 = False
         else:
             allow_chi2 = True
 
-        possible_kernels = ['poly', 'rbf', 'sigmoid', 'cosine']
+        possible_kernels = ["poly", "rbf", "sigmoid", "cosine"]
         if allow_chi2:
             possible_kernels.append("chi2")
-        kernel = CategoricalHyperparameter('kernel', possible_kernels, 'rbf')
+        kernel = CategoricalHyperparameter("kernel", possible_kernels, "rbf")
         n_components = UniformIntegerHyperparameter(
-            "n_components", 50, 10000, default_value=100, log=True)
-        gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8,
-                                           log=True, default_value=0.1)
-        degree = UniformIntegerHyperparameter('degree', 2, 5, 3)
+            "n_components", 50, 10000, default_value=100, log=True
+        )
+        gamma = UniformFloatHyperparameter(
+            "gamma", 3.0517578125e-05, 8, log=True, default_value=0.1
+        )
+        degree = UniformIntegerHyperparameter("degree", 2, 5, 3)
         coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0)
 
         cs = ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/feature_preprocessing/pca.py b/autosklearn/pipeline/components/feature_preprocessing/pca.py
index ae992520fa..a1ad9f3981 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/pca.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/pca.py
@@ -1,8 +1,9 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA
@@ -18,12 +19,13 @@ def __init__(self, keep_variance, whiten, random_state=None):
 
     def fit(self, X, Y=None):
         import sklearn.decomposition
+
         n_components = float(self.keep_variance)
         self.whiten = check_for_bool(self.whiten)
 
-        self.preprocessor = sklearn.decomposition.PCA(n_components=n_components,
-                                                      whiten=self.whiten,
-                                                      copy=True)
+        self.preprocessor = sklearn.decomposition.PCA(
+            n_components=n_components, whiten=self.whiten, copy=True
+        )
         self.preprocessor.fit(X)
 
         if not np.isfinite(self.preprocessor.components_).all():
@@ -38,24 +40,28 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'PCA',
-                'name': 'Principle Component Analysis',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                # TODO document that we have to be very careful
-                'is_deterministic': False,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (DENSE, UNSIGNED_DATA)}
+        return {
+            "shortname": "PCA",
+            "name": "Principle Component Analysis",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            # TODO document that we have to be very careful
+            "is_deterministic": False,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (DENSE, UNSIGNED_DATA),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         keep_variance = UniformFloatHyperparameter(
-            "keep_variance", 0.5, 0.9999, default_value=0.9999)
+            "keep_variance", 0.5, 0.9999, default_value=0.9999
+        )
         whiten = CategoricalHyperparameter(
-            "whiten", ["False", "True"], default_value="False")
+            "whiten", ["False", "True"], default_value="False"
+        )
         cs = ConfigurationSpace()
         cs.add_hyperparameters([keep_variance, whiten])
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
index 478040c497..bd5312bba0 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/polynomial.py
@@ -1,10 +1,11 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
 
@@ -25,8 +26,10 @@ def fit(self, X, Y):
         self.include_bias = check_for_bool(self.include_bias)
 
         self.preprocessor = sklearn.preprocessing.PolynomialFeatures(
-            degree=self.degree, interaction_only=self.interaction_only,
-            include_bias=self.include_bias)
+            degree=self.degree,
+            interaction_only=self.interaction_only,
+            include_bias=self.include_bias,
+        )
         self.preprocessor.fit(X, Y)
         return self
 
@@ -37,25 +40,29 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'PolynomialFeatures',
-                'name': 'PolynomialFeatures',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "PolynomialFeatures",
+            "name": "PolynomialFeatures",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         # More than degree 3 is too expensive!
         degree = UniformIntegerHyperparameter("degree", 2, 3, 2)
-        interaction_only = CategoricalHyperparameter("interaction_only",
-                                                     ["False", "True"], "False")
-        include_bias = CategoricalHyperparameter("include_bias",
-                                                 ["True", "False"], "True")
+        interaction_only = CategoricalHyperparameter(
+            "interaction_only", ["False", "True"], "False"
+        )
+        include_bias = CategoricalHyperparameter(
+            "include_bias", ["True", "False"], "True"
+        )
 
         cs = ConfigurationSpace()
         cs.add_hyperparameters([degree, interaction_only, include_bias])
diff --git a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
index a5e9ff1b8c..9daed1ae97 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py
@@ -1,17 +1,30 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformIntegerHyperparameter, \
-    UnParametrizedHyperparameter, Constant, CategoricalHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, SIGNED_DATA
-from autosklearn.util.common import check_none, check_for_bool
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, SPARSE, UNSIGNED_DATA
+from autosklearn.util.common import check_for_bool, check_none
 
 
 class RandomTreesEmbedding(AutoSklearnPreprocessingAlgorithm):
-
-    def __init__(self, n_estimators, max_depth, min_samples_split,
-                 min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes,
-                 bootstrap, sparse_output=True, n_jobs=1, random_state=None):
+    def __init__(
+        self,
+        n_estimators,
+        max_depth,
+        min_samples_split,
+        min_samples_leaf,
+        min_weight_fraction_leaf,
+        max_leaf_nodes,
+        bootstrap,
+        sparse_output=True,
+        n_jobs=1,
+        random_state=None,
+    ):
         self.n_estimators = n_estimators
         self.max_depth = max_depth
         self.min_samples_split = min_samples_split
@@ -48,7 +61,7 @@ def _fit(self, X, Y=None):
             max_leaf_nodes=self.max_leaf_nodes,
             sparse_output=self.sparse_output,
             n_jobs=self.n_jobs,
-            random_state=self.random_state
+            random_state=self.random_state,
         )
         self.preprocessor.fit(X, Y)
         return self
@@ -67,37 +80,48 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'RandomTreesEmbedding',
-                'name': 'Random Trees Embedding',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (SPARSE, SIGNED_DATA)}
+        return {
+            "shortname": "RandomTreesEmbedding",
+            "name": "Random Trees Embedding",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (SPARSE, SIGNED_DATA),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
-        n_estimators = UniformIntegerHyperparameter(name="n_estimators",
-                                                    lower=10, upper=100,
-                                                    default_value=10)
-        max_depth = UniformIntegerHyperparameter(name="max_depth",
-                                                 lower=2, upper=10,
-                                                 default_value=5)
-        min_samples_split = UniformIntegerHyperparameter(name="min_samples_split",
-                                                         lower=2, upper=20,
-                                                         default_value=2)
-        min_samples_leaf = UniformIntegerHyperparameter(name="min_samples_leaf",
-                                                        lower=1, upper=20,
-                                                        default_value=1)
-        min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0)
-        max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes",
-                                                      value="None")
-        bootstrap = CategoricalHyperparameter('bootstrap', ['True', 'False'])
+        n_estimators = UniformIntegerHyperparameter(
+            name="n_estimators", lower=10, upper=100, default_value=10
+        )
+        max_depth = UniformIntegerHyperparameter(
+            name="max_depth", lower=2, upper=10, default_value=5
+        )
+        min_samples_split = UniformIntegerHyperparameter(
+            name="min_samples_split", lower=2, upper=20, default_value=2
+        )
+        min_samples_leaf = UniformIntegerHyperparameter(
+            name="min_samples_leaf", lower=1, upper=20, default_value=1
+        )
+        min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 1.0)
+        max_leaf_nodes = UnParametrizedHyperparameter(
+            name="max_leaf_nodes", value="None"
+        )
+        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"])
         cs = ConfigurationSpace()
-        cs.add_hyperparameters([n_estimators, max_depth, min_samples_split,
-                                min_samples_leaf, min_weight_fraction_leaf,
-                                max_leaf_nodes, bootstrap])
+        cs.add_hyperparameters(
+            [
+                n_estimators,
+                max_depth,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                max_leaf_nodes,
+                bootstrap,
+            ]
+        )
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile.py
index c928e2f471..66f760bfb0 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile.py
@@ -1,11 +1,10 @@
 class SelectPercentileBase(object):
-
     def fit(self, X, y):
         import sklearn.feature_selection
 
         self.preprocessor = sklearn.feature_selection.SelectPercentile(
-            score_func=self.score_func,
-            percentile=self.percentile)
+            score_func=self.score_func, percentile=self.percentile
+        )
 
         self.preprocessor.fit(X, y)
         return self
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
index f6a3a1152c..3caa50b46d 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_classification.py
@@ -1,19 +1,30 @@
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import \
-    UniformFloatHyperparameter, CategoricalHyperparameter, Constant
 from functools import partial
 
-from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.components.feature_preprocessing.select_percentile import \
-    SelectPercentileBase
-from autosklearn.pipeline.constants import SPARSE, DENSE, INPUT, UNSIGNED_DATA, SIGNED_DATA
-
-
-class SelectPercentileClassification(SelectPercentileBase,
-                                     AutoSklearnPreprocessingAlgorithm):
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+)
 
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.feature_preprocessing.select_percentile import (
+    SelectPercentileBase,
+)
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
+
+
+class SelectPercentileClassification(
+    SelectPercentileBase, AutoSklearnPreprocessingAlgorithm
+):
     def __init__(self, percentile, score_func="chi2", random_state=None):
-        """ Parameters:
+        """Parameters:
         random state : ignored
 
         score_func : callable, Function taking two arrays X and y, and
@@ -28,11 +39,15 @@ def __init__(self, percentile, score_func="chi2", random_state=None):
         elif score_func == "f_classif":
             self.score_func = sklearn.feature_selection.f_classif
         elif score_func == "mutual_info":
-            self.score_func = partial(sklearn.feature_selection.mutual_info_classif,
-                                      random_state=self.random_state)
+            self.score_func = partial(
+                sklearn.feature_selection.mutual_info_classif,
+                random_state=self.random_state,
+            )
         else:
-            raise ValueError("score_func must be in ('chi2, 'f_classif', 'mutual_info'), "
-                             "but is: %s" % score_func)
+            raise ValueError(
+                "score_func must be in ('chi2, 'f_classif', 'mutual_info'), "
+                "but is: %s" % score_func
+            )
 
     def fit(self, X, y):
         import scipy.sparse
@@ -41,7 +56,7 @@ def fit(self, X, y):
         self.preprocessor = sklearn.feature_selection.SelectPercentile(
             score_func=self.score_func,
             percentile=self.percentile,
-            )
+        )
 
         # Because the pipeline guarantees that each feature is positive,
         # clip all values below zero to zero
@@ -70,44 +85,45 @@ def transform(self, X):
             raise NotImplementedError()
         Xt = self.preprocessor.transform(X)
         if Xt.shape[1] == 0:
-            raise ValueError(
-                "%s removed all features." % self.__class__.__name__)
+            raise ValueError("%s removed all features." % self.__class__.__name__)
         return Xt
 
     @staticmethod
     def get_properties(dataset_properties=None):
         data_type = UNSIGNED_DATA
         if dataset_properties is not None:
-            signed = dataset_properties.get('signed')
+            signed = dataset_properties.get("signed")
             if signed is not None:
                 data_type = SIGNED_DATA if signed is True else UNSIGNED_DATA
 
-        return {'shortname': 'SPC',
-                'name': 'Select Percentile Classification',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (SPARSE, DENSE, data_type),
-                'output': (INPUT,)}
+        return {
+            "shortname": "SPC",
+            "name": "Select Percentile Classification",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, data_type),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         percentile = UniformFloatHyperparameter(
-            name="percentile", lower=1, upper=99, default_value=50)
+            name="percentile", lower=1, upper=99, default_value=50
+        )
 
         score_func = CategoricalHyperparameter(
             name="score_func",
             choices=["chi2", "f_classif", "mutual_info"],
-            default_value="chi2"
+            default_value="chi2",
         )
         if dataset_properties is not None:
             # Chi2 can handle sparse data, so we respect this
-            if 'sparse' in dataset_properties and dataset_properties['sparse']:
-                score_func = Constant(
-                    name="score_func", value="chi2")
+            if "sparse" in dataset_properties and dataset_properties["sparse"]:
+                score_func = Constant(name="score_func", value="chi2")
 
         cs = ConfigurationSpace()
         cs.add_hyperparameters([percentile, score_func])
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
index 79b528c095..e9343fead4 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_percentile_regression.py
@@ -1,18 +1,23 @@
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, CategoricalHyperparameter
 from functools import partial
 
-from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.components.feature_preprocessing.select_percentile import \
-    SelectPercentileBase
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
 
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.components.feature_preprocessing.select_percentile import (
+    SelectPercentileBase,
+)
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
-class SelectPercentileRegression(SelectPercentileBase,
-                                 AutoSklearnPreprocessingAlgorithm):
 
+class SelectPercentileRegression(
+    SelectPercentileBase, AutoSklearnPreprocessingAlgorithm
+):
     def __init__(self, percentile, score_func="f_regression", random_state=None):
-        """ Parameters:
+        """Parameters:
         random state : ignored
 
         score_func : callable, Function taking two arrays X and y, and
@@ -25,31 +30,37 @@ def __init__(self, percentile, score_func="f_regression", random_state=None):
         if score_func == "f_regression":
             self.score_func = sklearn.feature_selection.f_regression
         elif score_func == "mutual_info":
-            self.score_func = partial(sklearn.feature_selection.mutual_info_regression,
-                                      random_state=self.random_state)
+            self.score_func = partial(
+                sklearn.feature_selection.mutual_info_regression,
+                random_state=self.random_state,
+            )
         else:
             raise ValueError("Don't know this scoring function: %s" % score_func)
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'SPR',
-                'name': 'Select Percentile Regression',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "SPR",
+            "name": "Select Percentile Regression",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         percentile = UniformFloatHyperparameter(
-            "percentile", lower=1, upper=99, default_value=50)
+            "percentile", lower=1, upper=99, default_value=50
+        )
 
         score_func = CategoricalHyperparameter(
-            name="score_func", choices=["f_regression", "mutual_info"])
+            name="score_func", choices=["f_regression", "mutual_info"]
+        )
 
         cs = ConfigurationSpace()
         cs.add_hyperparameters([percentile, score_func])
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
index de6e950f0b..0c4768d000 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_classification.py
@@ -1,17 +1,24 @@
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter
-from ConfigSpace import NotEqualsCondition
 from functools import partial
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SIGNED_DATA, UNSIGNED_DATA, SPARSE, DENSE, INPUT
+from ConfigSpace import NotEqualsCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
+
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
 
 
 class SelectClassificationRates(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, alpha, mode='fpr',
-                 score_func="chi2", random_state=None):
+    def __init__(self, alpha, mode="fpr", score_func="chi2", random_state=None):
         import sklearn.feature_selection
 
         self.random_state = random_state  # We don't use this
@@ -23,14 +30,18 @@ def __init__(self, alpha, mode='fpr',
         elif score_func == "f_classif":
             self.score_func = sklearn.feature_selection.f_classif
         elif score_func == "mutual_info_classif":
-            self.score_func = partial(sklearn.feature_selection.mutual_info_classif,
-                                      random_state=self.random_state)
+            self.score_func = partial(
+                sklearn.feature_selection.mutual_info_classif,
+                random_state=self.random_state,
+            )
             # mutual info classif constantly crashes without mode percentile
-            self.mode = 'percentile'
+            self.mode = "percentile"
         else:
-            raise ValueError("score_func must be in ('chi2, 'f_classif', 'mutual_info_classif') "
-                             "for classification "
-                             "but is: %s " % (score_func))
+            raise ValueError(
+                "score_func must be in ('chi2, 'f_classif', 'mutual_info_classif') "
+                "for classification "
+                "but is: %s " % (score_func)
+            )
 
     def fit(self, X, y):
         import scipy.sparse
@@ -39,7 +50,8 @@ def fit(self, X, y):
         self.alpha = float(self.alpha)
 
         self.preprocessor = sklearn.feature_selection.GenericUnivariateSelect(
-            score_func=self.score_func, param=self.alpha, mode=self.mode)
+            score_func=self.score_func, param=self.alpha, mode=self.mode
+        )
 
         # Because the pipeline guarantees that each feature is positive,
         # clip all values below zero to zero
@@ -69,16 +81,16 @@ def transform(self, X):
         try:
             Xt = self.preprocessor.transform(X)
         except ValueError as e:
-            if "zero-size array to reduction operation maximum which has no " \
-                    "identity" in e.message:
-                raise ValueError(
-                    "%s removed all features." % self.__class__.__name__)
+            if (
+                "zero-size array to reduction operation maximum which has no "
+                "identity" in e.message
+            ):
+                raise ValueError("%s removed all features." % self.__class__.__name__)
             else:
                 raise e
 
         if Xt.shape[1] == 0:
-            raise ValueError(
-                "%s removed all features." % self.__class__.__name__)
+            raise ValueError("%s removed all features." % self.__class__.__name__)
         return Xt
 
     @staticmethod
@@ -86,37 +98,39 @@ def get_properties(dataset_properties=None):
         data_type = UNSIGNED_DATA
 
         if dataset_properties is not None:
-            signed = dataset_properties.get('signed')
+            signed = dataset_properties.get("signed")
             if signed is not None:
                 data_type = SIGNED_DATA if signed is True else UNSIGNED_DATA
 
-        return {'shortname': 'SR',
-                'name': 'Univariate Feature Selection based on rates',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (SPARSE, DENSE, data_type),
-                'output': (INPUT,)}
+        return {
+            "shortname": "SR",
+            "name": "Univariate Feature Selection based on rates",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, data_type),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         alpha = UniformFloatHyperparameter(
-            name="alpha", lower=0.01, upper=0.5, default_value=0.1)
+            name="alpha", lower=0.01, upper=0.5, default_value=0.1
+        )
 
-        if dataset_properties is not None and dataset_properties.get('sparse'):
-            choices = ['chi2', 'mutual_info_classif']
+        if dataset_properties is not None and dataset_properties.get("sparse"):
+            choices = ["chi2", "mutual_info_classif"]
         else:
-            choices = ['chi2', 'f_classif', 'mutual_info_classif']
+            choices = ["chi2", "f_classif", "mutual_info_classif"]
 
         score_func = CategoricalHyperparameter(
-            name="score_func",
-            choices=choices,
-            default_value="chi2")
+            name="score_func", choices=choices, default_value="chi2"
+        )
 
-        mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr')
+        mode = CategoricalHyperparameter("mode", ["fpr", "fdr", "fwe"], "fpr")
 
         cs = ConfigurationSpace()
         cs.add_hyperparameter(alpha)
@@ -125,7 +139,7 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
         # mutual_info_classif constantly crashes if mode is not percentile
         # as a WA, fix the mode for this score
-        cond = NotEqualsCondition(mode, score_func, 'mutual_info_classif')
+        cond = NotEqualsCondition(mode, score_func, "mutual_info_classif")
         cs.add_condition(cond)
 
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
index b5bfd2a103..ffec19e6ec 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/select_rates_regression.py
@@ -1,17 +1,20 @@
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter
-from ConfigSpace import NotEqualsCondition
 from functools import partial
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import UNSIGNED_DATA, SPARSE, DENSE, INPUT
+from ConfigSpace import NotEqualsCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
+
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class SelectRegressionRates(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, alpha, mode='percentile',
-                 score_func="f_regression", random_state=None):
+    def __init__(
+        self, alpha, mode="percentile", score_func="f_regression", random_state=None
+    ):
         import sklearn.feature_selection
 
         self.random_state = random_state  # We don't use this
@@ -21,14 +24,18 @@ def __init__(self, alpha, mode='percentile',
         if score_func == "f_regression":
             self.score_func = sklearn.feature_selection.f_regression
         elif score_func == "mutual_info_regression":
-            self.score_func = partial(sklearn.feature_selection.mutual_info_regression,
-                                      random_state=self.random_state)
+            self.score_func = partial(
+                sklearn.feature_selection.mutual_info_regression,
+                random_state=self.random_state,
+            )
             # Mutual info consistently crashes if percentile is not the mode
-            self.mode = 'percentile'
+            self.mode = "percentile"
         else:
-            raise ValueError("score_func must be in ('f_regression, 'mutual_info_regression') "
-                             "for task=regression "
-                             "but is: %s " % (score_func))
+            raise ValueError(
+                "score_func must be in ('f_regression, 'mutual_info_regression') "
+                "for task=regression "
+                "but is: %s " % (score_func)
+            )
 
     def fit(self, X, y):
         import sklearn.feature_selection
@@ -36,7 +43,8 @@ def fit(self, X, y):
         self.alpha = float(self.alpha)
 
         self.preprocessor = sklearn.feature_selection.GenericUnivariateSelect(
-            score_func=self.score_func, param=self.alpha, mode=self.mode)
+            score_func=self.score_func, param=self.alpha, mode=self.mode
+        )
 
         self.preprocessor.fit(X, y)
         return self
@@ -48,47 +56,49 @@ def transform(self, X):
         try:
             Xt = self.preprocessor.transform(X)
         except ValueError as e:
-            if "zero-size array to reduction operation maximum which has no " \
-                    "identity" in e.message:
-                raise ValueError(
-                    "%s removed all features." % self.__class__.__name__)
+            if (
+                "zero-size array to reduction operation maximum which has no "
+                "identity" in e.message
+            ):
+                raise ValueError("%s removed all features." % self.__class__.__name__)
             else:
                 raise e
 
         if Xt.shape[1] == 0:
-            raise ValueError(
-                "%s removed all features." % self.__class__.__name__)
+            raise ValueError("%s removed all features." % self.__class__.__name__)
         return Xt
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'SR',
-                'name': 'Univariate Feature Selection based on rates',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': True,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "SR",
+            "name": "Univariate Feature Selection based on rates",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         alpha = UniformFloatHyperparameter(
-            name="alpha", lower=0.01, upper=0.5, default_value=0.1)
+            name="alpha", lower=0.01, upper=0.5, default_value=0.1
+        )
 
-        if dataset_properties is not None and dataset_properties.get('sparse'):
-            choices = ['mutual_info_regression', 'f_regression']
+        if dataset_properties is not None and dataset_properties.get("sparse"):
+            choices = ["mutual_info_regression", "f_regression"]
         else:
-            choices = ['f_regression']
+            choices = ["f_regression"]
 
         score_func = CategoricalHyperparameter(
-            name="score_func",
-            choices=choices,
-            default_value="f_regression")
+            name="score_func", choices=choices, default_value="f_regression"
+        )
 
-        mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr')
+        mode = CategoricalHyperparameter("mode", ["fpr", "fdr", "fwe"], "fpr")
 
         cs = ConfigurationSpace()
         cs.add_hyperparameter(alpha)
@@ -96,8 +106,8 @@ def get_hyperparameter_search_space(dataset_properties=None):
         cs.add_hyperparameter(mode)
 
         # Mutual info consistently crashes if percentile is not the mode
-        if 'mutual_info_regression' in choices:
-            cond = NotEqualsCondition(mode, score_func, 'mutual_info_regression')
+        if "mutual_info_regression" in choices:
+            cond = NotEqualsCondition(mode, score_func, "mutual_info_regression")
             cs.add_condition(cond)
 
         return cs
diff --git a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
index 078b2b4a2d..4d6f6b7ca9 100644
--- a/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
+++ b/autosklearn/pipeline/components/feature_preprocessing/truncatedSVD.py
@@ -2,7 +2,7 @@
 from ConfigSpace.hyperparameters import UniformIntegerHyperparameter
 
 from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, UNSIGNED_DATA, DENSE, INPUT
+from autosklearn.pipeline.constants import DENSE, INPUT, SPARSE, UNSIGNED_DATA
 
 
 class TruncatedSVD(AutoSklearnPreprocessingAlgorithm):
@@ -17,7 +17,8 @@ def fit(self, X, Y):
         self.target_dim = int(self.target_dim)
         target_dim = min(self.target_dim, X.shape[1] - 1)
         self.preprocessor = sklearn.decomposition.TruncatedSVD(
-            target_dim, algorithm='randomized',  random_state=self.random_state)
+            target_dim, algorithm="randomized", random_state=self.random_state
+        )
         # TODO: remove when migrating to sklearn 0.16
         # Circumvents a bug in sklearn
         # https://github.com/scikit-learn/scikit-learn/commit/f08b8c8e52663167819f242f605db39f3b5a6d0c
@@ -33,21 +34,24 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'TSVD',
-                'name': 'Truncated Singular Value Decomposition',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (SPARSE, UNSIGNED_DATA),
-                'output': (DENSE, INPUT)}
+        return {
+            "shortname": "TSVD",
+            "name": "Truncated Singular Value Decomposition",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (SPARSE, UNSIGNED_DATA),
+            "output": (DENSE, INPUT),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         target_dim = UniformIntegerHyperparameter(
-            "target_dim", 10, 256, default_value=128)
+            "target_dim", 10, 256, default_value=128
+        )
         cs = ConfigurationSpace()
         cs.add_hyperparameter(target_dim)
         return cs
diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
index 651b49b602..73033467a7 100644
--- a/autosklearn/pipeline/components/regression/__init__.py
+++ b/autosklearn/pipeline/components/regression/__init__.py
@@ -1,18 +1,25 @@
-from collections import OrderedDict
 from typing import Type
+
 import os
+from collections import OrderedDict
 
-from ..base import AutoSklearnRegressionAlgorithm, find_components, \
-    ThirdPartyComponents, AutoSklearnChoice, _addons
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
+from ..base import (
+    AutoSklearnChoice,
+    AutoSklearnRegressionAlgorithm,
+    ThirdPartyComponents,
+    _addons,
+    find_components,
+)
+
 regressor_directory = os.path.split(__file__)[0]
-_regressors = find_components(__package__,
-                              regressor_directory,
-                              AutoSklearnRegressionAlgorithm)
+_regressors = find_components(
+    __package__, regressor_directory, AutoSklearnRegressionAlgorithm
+)
 additional_components = ThirdPartyComponents(AutoSklearnRegressionAlgorithm)
-_addons['regression'] = additional_components
+_addons["regression"] = additional_components
 
 
 def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None:
@@ -20,7 +27,6 @@ def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None:
 
 
 class RegressorChoice(AutoSklearnChoice):
-
     @classmethod
     def get_components(cls):
         components = OrderedDict()
@@ -29,10 +35,9 @@ def get_components(cls):
         return components
 
     @classmethod
-    def get_available_components(cls,
-                                 dataset_properties=None,
-                                 include=None,
-                                 exclude=None):
+    def get_available_components(
+        cls, dataset_properties=None, include=None, exclude=None
+    ):
         available_comp = cls.get_components()
         components_dict = OrderedDict()
         if dataset_properties is None:
@@ -40,13 +45,15 @@ def get_available_components(cls,
 
         if include is not None and exclude is not None:
             raise ValueError(
-                "The argument include and exclude cannot be used together.")
+                "The argument include and exclude cannot be used together."
+            )
 
         if include is not None:
             for incl in include:
                 if incl not in available_comp:
-                    raise ValueError("Trying to include unknown component: "
-                                     "%s" % incl)
+                    raise ValueError(
+                        "Trying to include unknown component: " "%s" % incl
+                    )
 
         for name in available_comp:
             if include is not None and name not in include:
@@ -60,36 +67,39 @@ def get_available_components(cls,
             if entry == RegressorChoice:
                 continue
 
-            if entry.get_properties()['handles_regression'] is False:
+            if entry.get_properties()["handles_regression"] is False:
                 continue
-            if dataset_properties.get('multioutput') is True and \
-               entry.get_properties()['handles_multioutput'] is False:
+            if (
+                dataset_properties.get("multioutput") is True
+                and entry.get_properties()["handles_multioutput"] is False
+            ):
                 continue
             components_dict[name] = entry
 
         return components_dict
 
-    def get_hyperparameter_search_space(self, dataset_properties=None,
-                                        default=None,
-                                        include=None,
-                                        exclude=None):
+    def get_hyperparameter_search_space(
+        self, dataset_properties=None, default=None, include=None, exclude=None
+    ):
         if include is not None and exclude is not None:
-            raise ValueError("The argument include and exclude cannot be used together.")
+            raise ValueError(
+                "The argument include and exclude cannot be used together."
+            )
 
         cs = ConfigurationSpace()
 
         # Compile a list of all estimator objects for this problem
         available_estimators = self.get_available_components(
-            dataset_properties=dataset_properties,
-            include=include,
-            exclude=exclude)
+            dataset_properties=dataset_properties, include=include, exclude=exclude
+        )
 
         if len(available_estimators) == 0:
             raise ValueError("No regressors found")
 
         if default is None:
-            defaults = ['random_forest', 'support_vector_regression'] + \
-                list(available_estimators.keys())
+            defaults = ["random_forest", "support_vector_regression"] + list(
+                available_estimators.keys()
+            )
             for default_ in defaults:
                 if default_ in available_estimators:
                     if include is not None and default_ not in include:
@@ -99,21 +109,25 @@ def get_hyperparameter_search_space(self, dataset_properties=None,
                     default = default_
                     break
 
-        estimator = CategoricalHyperparameter('__choice__',
-                                              list(available_estimators.keys()),
-                                              default_value=default)
+        estimator = CategoricalHyperparameter(
+            "__choice__", list(available_estimators.keys()), default_value=default
+        )
         cs.add_hyperparameter(estimator)
         for estimator_name in available_estimators.keys():
-            estimator_configuration_space = available_estimators[estimator_name].\
-                get_hyperparameter_search_space(dataset_properties)
-            parent_hyperparameter = {'parent': estimator, 'value': estimator_name}
-            cs.add_configuration_space(estimator_name, estimator_configuration_space,
-                                       parent_hyperparameter=parent_hyperparameter)
+            estimator_configuration_space = available_estimators[
+                estimator_name
+            ].get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {"parent": estimator, "value": estimator_name}
+            cs.add_configuration_space(
+                estimator_name,
+                estimator_configuration_space,
+                parent_hyperparameter=parent_hyperparameter,
+            )
 
         return cs
 
     def estimator_supports_iterative_fit(self):
-        return hasattr(self.choice, 'iterative_fit')
+        return hasattr(self.choice, "iterative_fit")
 
     def get_max_iter(self):
         if self.estimator_supports_iterative_fit():
diff --git a/autosklearn/pipeline/components/regression/adaboost.py b/autosklearn/pipeline/components/regression/adaboost.py
index 2eb58ae2ea..e78a57e6a2 100644
--- a/autosklearn/pipeline/components/regression/adaboost.py
+++ b/autosklearn/pipeline/components/regression/adaboost.py
@@ -1,9 +1,12 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
 
 class AdaboostRegressor(AutoSklearnRegressionAlgorithm):
@@ -22,15 +25,14 @@ def fit(self, X, y):
         self.n_estimators = int(self.n_estimators)
         self.learning_rate = float(self.learning_rate)
         self.max_depth = int(self.max_depth)
-        base_estimator = sklearn.tree.DecisionTreeRegressor(
-            max_depth=self.max_depth)
+        base_estimator = sklearn.tree.DecisionTreeRegressor(max_depth=self.max_depth)
 
         self.estimator = sklearn.ensemble.AdaBoostRegressor(
             base_estimator=base_estimator,
             n_estimators=self.n_estimators,
             learning_rate=self.learning_rate,
             loss=self.loss,
-            random_state=self.random_state
+            random_state=self.random_state,
         )
 
         if y.ndim == 2 and y.shape[1] == 1:
@@ -46,16 +48,18 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'AB',
-                'name': 'AdaBoost Regressor',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS, )}
+        return {
+            "shortname": "AB",
+            "name": "AdaBoost Regressor",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -63,16 +67,19 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
         # base_estimator = Constant(name="base_estimator", value="None")
         n_estimators = UniformIntegerHyperparameter(
-            name="n_estimators", lower=50, upper=500, default_value=50,
-            log=False)
+            name="n_estimators", lower=50, upper=500, default_value=50, log=False
+        )
         learning_rate = UniformFloatHyperparameter(
-            name="learning_rate", lower=0.01, upper=2, default_value=0.1,
-            log=True)
+            name="learning_rate", lower=0.01, upper=2, default_value=0.1, log=True
+        )
         loss = CategoricalHyperparameter(
-            name="loss", choices=["linear", "square", "exponential"],
-            default_value="linear")
+            name="loss",
+            choices=["linear", "square", "exponential"],
+            default_value="linear",
+        )
         max_depth = UniformIntegerHyperparameter(
-            name="max_depth", lower=1, upper=10, default_value=1, log=False)
+            name="max_depth", lower=1, upper=10, default_value=1, log=False
+        )
 
         cs.add_hyperparameters([n_estimators, learning_rate, loss, max_depth])
         return cs
diff --git a/autosklearn/pipeline/components/regression/ard_regression.py b/autosklearn/pipeline/components/regression/ard_regression.py
index 46dcac5d93..219cb775af 100644
--- a/autosklearn/pipeline/components/regression/ard_regression.py
+++ b/autosklearn/pipeline/components/regression/ard_regression.py
@@ -1,15 +1,27 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UnParametrizedHyperparameter
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
 
 class ARDRegression(AutoSklearnRegressionAlgorithm):
-    def __init__(self, n_iter, tol, alpha_1, alpha_2, lambda_1, lambda_2,
-                 threshold_lambda, fit_intercept, random_state=None):
+    def __init__(
+        self,
+        n_iter,
+        tol,
+        alpha_1,
+        alpha_2,
+        lambda_1,
+        lambda_2,
+        threshold_lambda,
+        fit_intercept,
+        random_state=None,
+    ):
         self.random_state = random_state
         self.estimator = None
 
@@ -46,7 +58,7 @@ def fit(self, X, y):
             fit_intercept=True,
             normalize=False,
             copy_X=False,
-            verbose=False
+            verbose=False,
         )
 
         if y.ndim == 2 and y.shape[1] == 1:
@@ -62,43 +74,71 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'ARD',
-                'name': 'ARD Regression',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'prefers_data_normalized': True,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "ARD",
+            "name": "ARD Regression",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "prefers_data_normalized": True,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         n_iter = UnParametrizedHyperparameter("n_iter", value=300)
-        tol = UniformFloatHyperparameter("tol", 10 ** -5, 10 ** -1,
-                                         default_value=10 ** -3, log=True)
-        alpha_1 = UniformFloatHyperparameter(name="alpha_1", lower=10 ** -10,
-                                             upper=10 ** -3, default_value=10 ** -6)
-        alpha_2 = UniformFloatHyperparameter(name="alpha_2", log=True,
-                                             lower=10 ** -10, upper=10 ** -3,
-                                             default_value=10 ** -6)
-        lambda_1 = UniformFloatHyperparameter(name="lambda_1", log=True,
-                                              lower=10 ** -10, upper=10 ** -3,
-                                              default_value=10 ** -6)
-        lambda_2 = UniformFloatHyperparameter(name="lambda_2", log=True,
-                                              lower=10 ** -10, upper=10 ** -3,
-                                              default_value=10 ** -6)
-        threshold_lambda = UniformFloatHyperparameter(name="threshold_lambda",
-                                                      log=True,
-                                                      lower=10 ** 3,
-                                                      upper=10 ** 5,
-                                                      default_value=10 ** 4)
+        tol = UniformFloatHyperparameter(
+            "tol", 10**-5, 10**-1, default_value=10**-3, log=True
+        )
+        alpha_1 = UniformFloatHyperparameter(
+            name="alpha_1", lower=10**-10, upper=10**-3, default_value=10**-6
+        )
+        alpha_2 = UniformFloatHyperparameter(
+            name="alpha_2",
+            log=True,
+            lower=10**-10,
+            upper=10**-3,
+            default_value=10**-6,
+        )
+        lambda_1 = UniformFloatHyperparameter(
+            name="lambda_1",
+            log=True,
+            lower=10**-10,
+            upper=10**-3,
+            default_value=10**-6,
+        )
+        lambda_2 = UniformFloatHyperparameter(
+            name="lambda_2",
+            log=True,
+            lower=10**-10,
+            upper=10**-3,
+            default_value=10**-6,
+        )
+        threshold_lambda = UniformFloatHyperparameter(
+            name="threshold_lambda",
+            log=True,
+            lower=10**3,
+            upper=10**5,
+            default_value=10**4,
+        )
         fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
 
-        cs.add_hyperparameters([n_iter, tol, alpha_1, alpha_2, lambda_1,
-                                lambda_2, threshold_lambda, fit_intercept])
+        cs.add_hyperparameters(
+            [
+                n_iter,
+                tol,
+                alpha_1,
+                alpha_2,
+                lambda_1,
+                lambda_2,
+                threshold_lambda,
+                fit_intercept,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/regression/decision_tree.py b/autosklearn/pipeline/components/regression/decision_tree.py
index 5ecbd254be..db59767587 100644
--- a/autosklearn/pipeline/components/regression/decision_tree.py
+++ b/autosklearn/pipeline/components/regression/decision_tree.py
@@ -1,20 +1,31 @@
 import numpy as np
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, \
-    UnParametrizedHyperparameter, Constant
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
-from autosklearn.pipeline.components.base import \
-    AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_none
 
 
 class DecisionTree(AutoSklearnRegressionAlgorithm):
-    def __init__(self, criterion, max_features, max_depth_factor,
-                 min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
-                 max_leaf_nodes, min_impurity_decrease, random_state=None):
+    def __init__(
+        self,
+        criterion,
+        max_features,
+        max_depth_factor,
+        min_samples_split,
+        min_samples_leaf,
+        min_weight_fraction_leaf,
+        max_leaf_nodes,
+        min_impurity_decrease,
+        random_state=None,
+    ):
         self.criterion = criterion
         self.max_features = max_features
         self.max_depth_factor = max_depth_factor
@@ -36,8 +47,8 @@ def fit(self, X, y, sample_weight=None):
             num_features = X.shape[1]
             self.max_depth_factor = int(self.max_depth_factor)
             max_depth_factor = max(
-                1,
-                int(np.round(self.max_depth_factor * num_features, 0)))
+                1, int(np.round(self.max_depth_factor * num_features, 0))
+            )
         self.min_samples_split = int(self.min_samples_split)
         self.min_samples_leaf = int(self.min_samples_leaf)
         if check_none(self.max_leaf_nodes):
@@ -55,7 +66,8 @@ def fit(self, X, y, sample_weight=None):
             max_leaf_nodes=self.max_leaf_nodes,
             min_weight_fraction_leaf=self.min_weight_fraction_leaf,
             min_impurity_decrease=self.min_impurity_decrease,
-            random_state=self.random_state)
+            random_state=self.random_state,
+        )
 
         if y.ndim == 2 and y.shape[1] == 1:
             y = y.flatten()
@@ -70,38 +82,53 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'DT',
-                'name': 'Decision Tree Classifier',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': True,
-                'is_deterministic': False,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "DT",
+            "name": "Decision Tree Classifier",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": True,
+            "is_deterministic": False,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
-        criterion = CategoricalHyperparameter('criterion',
-                                              ['mse', 'friedman_mse', 'mae'])
-        max_features = Constant('max_features', 1.0)
+        criterion = CategoricalHyperparameter(
+            "criterion", ["mse", "friedman_mse", "mae"]
+        )
+        max_features = Constant("max_features", 1.0)
         max_depth_factor = UniformFloatHyperparameter(
-            'max_depth_factor', 0., 2., default_value=0.5)
+            "max_depth_factor", 0.0, 2.0, default_value=0.5
+        )
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
         min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0)
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
         min_impurity_decrease = UnParametrizedHyperparameter(
-            'min_impurity_decrease', 0.0)
+            "min_impurity_decrease", 0.0
+        )
 
-        cs.add_hyperparameters([criterion, max_features, max_depth_factor,
-                                min_samples_split, min_samples_leaf,
-                                min_weight_fraction_leaf, max_leaf_nodes,
-                                min_impurity_decrease])
+        cs.add_hyperparameters(
+            [
+                criterion,
+                max_features,
+                max_depth_factor,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                max_leaf_nodes,
+                min_impurity_decrease,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/regression/extra_trees.py b/autosklearn/pipeline/components/regression/extra_trees.py
index a676f0483d..c4646a2709 100644
--- a/autosklearn/pipeline/components/regression/extra_trees.py
+++ b/autosklearn/pipeline/components/regression/extra_trees.py
@@ -1,12 +1,16 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, UnParametrizedHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
 
@@ -14,10 +18,22 @@ class ExtraTreesRegressor(
     IterativeComponent,
     AutoSklearnRegressionAlgorithm,
 ):
-    def __init__(self, criterion, min_samples_leaf,
-                 min_samples_split, max_features, bootstrap, max_leaf_nodes,
-                 max_depth, min_weight_fraction_leaf, min_impurity_decrease,
-                 oob_score=False, n_jobs=1, random_state=None, verbose=0):
+    def __init__(
+        self,
+        criterion,
+        min_samples_leaf,
+        min_samples_split,
+        max_features,
+        bootstrap,
+        max_leaf_nodes,
+        max_depth,
+        min_weight_fraction_leaf,
+        min_impurity_decrease,
+        oob_score=False,
+        n_jobs=1,
+        random_state=None,
+        verbose=0,
+    ):
 
         self.n_estimators = self.get_max_iter()
         self.criterion = criterion
@@ -53,7 +69,8 @@ def iterative_fit(self, X, y, n_iter=1, refit=False):
             if self.criterion not in ("mse", "friedman_mse", "mae"):
                 raise ValueError(
                     "'criterion' is not in ('mse', 'friedman_mse', "
-                    "'mae): %s" % self.criterion)
+                    "'mae): %s" % self.criterion
+                )
 
             if check_none(self.max_depth):
                 self.max_depth = None
@@ -75,25 +92,28 @@ def iterative_fit(self, X, y, n_iter=1, refit=False):
             self.n_jobs = int(self.n_jobs)
             self.verbose = int(self.verbose)
 
-            self.estimator = ETR(n_estimators=n_iter,
-                                 criterion=self.criterion,
-                                 max_depth=self.max_depth,
-                                 min_samples_split=self.min_samples_split,
-                                 min_samples_leaf=self.min_samples_leaf,
-                                 bootstrap=self.bootstrap,
-                                 max_features=self.max_features,
-                                 max_leaf_nodes=self.max_leaf_nodes,
-                                 min_weight_fraction_leaf=self.min_weight_fraction_leaf,
-                                 min_impurity_decrease=self.min_impurity_decrease,
-                                 oob_score=self.oob_score,
-                                 n_jobs=self.n_jobs,
-                                 verbose=self.verbose,
-                                 random_state=self.random_state,
-                                 warm_start=True)
+            self.estimator = ETR(
+                n_estimators=n_iter,
+                criterion=self.criterion,
+                max_depth=self.max_depth,
+                min_samples_split=self.min_samples_split,
+                min_samples_leaf=self.min_samples_leaf,
+                bootstrap=self.bootstrap,
+                max_features=self.max_features,
+                max_leaf_nodes=self.max_leaf_nodes,
+                min_weight_fraction_leaf=self.min_weight_fraction_leaf,
+                min_impurity_decrease=self.min_impurity_decrease,
+                oob_score=self.oob_score,
+                n_jobs=self.n_jobs,
+                verbose=self.verbose,
+                random_state=self.random_state,
+                warm_start=True,
+            )
         else:
             self.estimator.n_estimators += n_iter
-            self.estimator.n_estimators = min(self.estimator.n_estimators,
-                                              self.n_estimators)
+            self.estimator.n_estimators = min(
+                self.estimator.n_estimators, self.n_estimators
+            )
 
         if y.ndim == 2 and y.shape[1] == 1:
             y = y.flatten()
@@ -114,44 +134,62 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'ET',
-                'name': 'Extra Trees Regressor',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "ET",
+            "name": "Extra Trees Regressor",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
-        criterion = CategoricalHyperparameter("criterion",
-                                              ['mse', 'friedman_mse', 'mae'])
+        criterion = CategoricalHyperparameter(
+            "criterion", ["mse", "friedman_mse", "mae"]
+        )
         max_features = UniformFloatHyperparameter(
-            "max_features", 0.1, 1.0, default_value=1)
+            "max_features", 0.1, 1.0, default_value=1
+        )
 
         max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
-        min_weight_fraction_leaf = UnParametrizedHyperparameter('min_weight_fraction_leaf', 0.)
+        min_weight_fraction_leaf = UnParametrizedHyperparameter(
+            "min_weight_fraction_leaf", 0.0
+        )
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
 
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
         min_impurity_decrease = UnParametrizedHyperparameter(
-            'min_impurity_decrease', 0.0
+            "min_impurity_decrease", 0.0
         )
 
         bootstrap = CategoricalHyperparameter(
-            "bootstrap", ["True", "False"], default_value="False")
+            "bootstrap", ["True", "False"], default_value="False"
+        )
 
-        cs.add_hyperparameters([criterion, max_features,
-                                max_depth, max_leaf_nodes, min_samples_split,
-                                min_samples_leaf, min_impurity_decrease, min_weight_fraction_leaf,
-                                bootstrap])
+        cs.add_hyperparameters(
+            [
+                criterion,
+                max_features,
+                max_depth,
+                max_leaf_nodes,
+                min_samples_split,
+                min_samples_leaf,
+                min_impurity_decrease,
+                min_weight_fraction_leaf,
+                bootstrap,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/regression/gaussian_process.py b/autosklearn/pipeline/components/regression/gaussian_process.py
index c587b13b0e..1acf238cd1 100644
--- a/autosklearn/pipeline/components/regression/gaussian_process.py
+++ b/autosklearn/pipeline/components/regression/gaussian_process.py
@@ -2,7 +2,7 @@
 from ConfigSpace.hyperparameters import UniformFloatHyperparameter
 
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 
 
 class GaussianProcess(AutoSklearnRegressionAlgorithm):
@@ -22,19 +22,19 @@ def fit(self, X, y):
 
         n_features = X.shape[1]
         kernel = sklearn.gaussian_process.kernels.RBF(
-            length_scale=[1.0]*n_features,
-            length_scale_bounds=[(self.thetaL, self.thetaU)]*n_features
+            length_scale=[1.0] * n_features,
+            length_scale_bounds=[(self.thetaL, self.thetaU)] * n_features,
         )
 
         # Instanciate a Gaussian Process model
         self.estimator = sklearn.gaussian_process.GaussianProcessRegressor(
             kernel=kernel,
             n_restarts_optimizer=10,
-            optimizer='fmin_l_bfgs_b',
+            optimizer="fmin_l_bfgs_b",
             alpha=self.alpha,
             copy_X_train=True,
             random_state=self.random_state,
-            normalize_y=True
+            normalize_y=True,
         )
 
         if y.ndim == 2 and y.shape[1] == 1:
@@ -51,25 +51,30 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'GP',
-                'name': 'Gaussian Process',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "GP",
+            "name": "Gaussian Process",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         alpha = UniformFloatHyperparameter(
-            name="alpha", lower=1e-14, upper=1.0, default_value=1e-8, log=True)
+            name="alpha", lower=1e-14, upper=1.0, default_value=1e-8, log=True
+        )
         thetaL = UniformFloatHyperparameter(
-            name="thetaL", lower=1e-10, upper=1e-3, default_value=1e-6, log=True)
+            name="thetaL", lower=1e-10, upper=1e-3, default_value=1e-6, log=True
+        )
         thetaU = UniformFloatHyperparameter(
-            name="thetaU", lower=1.0, upper=100000, default_value=100000.0, log=True)
+            name="thetaU", lower=1.0, upper=100000, default_value=100000.0, log=True
+        )
 
         cs = ConfigurationSpace()
         cs.add_hyperparameters([alpha, thetaL, thetaU])
diff --git a/autosklearn/pipeline/components/regression/gradient_boosting.py b/autosklearn/pipeline/components/regression/gradient_boosting.py
index ad57596b9a..b7503f5fd0 100644
--- a/autosklearn/pipeline/components/regression/gradient_boosting.py
+++ b/autosklearn/pipeline/components/regression/gradient_boosting.py
@@ -1,16 +1,19 @@
 import numpy as np
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, Constant, \
-    UnParametrizedHyperparameter
 from ConfigSpace.conditions import EqualsCondition, InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, UNSIGNED_DATA
 from autosklearn.util.common import check_none
 
 
@@ -18,10 +21,23 @@ class GradientBoosting(
     IterativeComponent,
     AutoSklearnRegressionAlgorithm,
 ):
-    def __init__(self, loss, learning_rate, min_samples_leaf, max_depth,
-                 max_leaf_nodes, max_bins, l2_regularization, early_stop, tol, scoring,
-                 n_iter_no_change=0, validation_fraction=None, random_state=None,
-                 verbose=0):
+    def __init__(
+        self,
+        loss,
+        learning_rate,
+        min_samples_leaf,
+        max_depth,
+        max_leaf_nodes,
+        max_bins,
+        l2_regularization,
+        early_stop,
+        tol,
+        scoring,
+        n_iter_no_change=0,
+        validation_fraction=None,
+        random_state=None,
+        verbose=0,
+    ):
         self.loss = loss
         self.learning_rate = learning_rate
         self.max_iter = self.get_max_iter()
@@ -48,7 +64,7 @@ def get_current_iter(self):
         return self.estimator.n_iter_
 
     def iterative_fit(self, X, y, n_iter=2, refit=False):
-        """ Set n_iter=2 for the same reason as for SGD """
+        """Set n_iter=2 for the same reason as for SGD"""
         import sklearn.ensemble
         from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 
@@ -106,8 +122,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
             )
         else:
             self.estimator.max_iter += n_iter
-            self.estimator.max_iter = min(self.estimator.max_iter,
-                                          self.max_iter)
+            self.estimator.max_iter = min(self.estimator.max_iter, self.max_iter)
 
         if y.ndim == 2 and y.shape[1] == 1:
             y = y.flatten()
@@ -125,7 +140,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
     def configuration_fully_fitted(self):
         if self.estimator is None:
             return False
-        elif not hasattr(self, 'fully_fit_'):
+        elif not hasattr(self, "fully_fit_"):
             return False
         else:
             return self.fully_fit_
@@ -137,54 +152,79 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'GB',
-                'name': 'Gradient Boosting Regressor',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "GB",
+            "name": "Gradient Boosting Regressor",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         loss = CategoricalHyperparameter(
-            "loss", ["least_squares"], default_value="least_squares")
+            "loss", ["least_squares"], default_value="least_squares"
+        )
         learning_rate = UniformFloatHyperparameter(
-            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
+            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True)
-        max_depth = UnParametrizedHyperparameter(
-            name="max_depth", value="None")
+            name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True
+        )
+        max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
         max_leaf_nodes = UniformIntegerHyperparameter(
-            name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True)
+            name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True
+        )
         max_bins = Constant("max_bins", 255)
         l2_regularization = UniformFloatHyperparameter(
-            name="l2_regularization", lower=1E-10, upper=1, default_value=1E-10, log=True)
+            name="l2_regularization",
+            lower=1e-10,
+            upper=1,
+            default_value=1e-10,
+            log=True,
+        )
 
         early_stop = CategoricalHyperparameter(
-            name="early_stop", choices=["off", "valid", "train"], default_value="off")
-        tol = UnParametrizedHyperparameter(
-            name="tol", value=1e-7)
-        scoring = UnParametrizedHyperparameter(
-            name="scoring", value="loss")
+            name="early_stop", choices=["off", "valid", "train"], default_value="off"
+        )
+        tol = UnParametrizedHyperparameter(name="tol", value=1e-7)
+        scoring = UnParametrizedHyperparameter(name="scoring", value="loss")
         n_iter_no_change = UniformIntegerHyperparameter(
-            name="n_iter_no_change", lower=1, upper=20, default_value=10)
+            name="n_iter_no_change", lower=1, upper=20, default_value=10
+        )
         validation_fraction = UniformFloatHyperparameter(
-            name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1)
-
-        cs.add_hyperparameters([loss, learning_rate, min_samples_leaf,
-                                max_depth, max_leaf_nodes, max_bins, l2_regularization,
-                                early_stop, tol, scoring, n_iter_no_change,
-                                validation_fraction])
+            name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1
+        )
+
+        cs.add_hyperparameters(
+            [
+                loss,
+                learning_rate,
+                min_samples_leaf,
+                max_depth,
+                max_leaf_nodes,
+                max_bins,
+                l2_regularization,
+                early_stop,
+                tol,
+                scoring,
+                n_iter_no_change,
+                validation_fraction,
+            ]
+        )
 
         n_iter_no_change_cond = InCondition(
-            n_iter_no_change, early_stop, ["valid", "train"])
+            n_iter_no_change, early_stop, ["valid", "train"]
+        )
         validation_fraction_cond = EqualsCondition(
-            validation_fraction, early_stop, "valid")
+            validation_fraction, early_stop, "valid"
+        )
 
         cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond])
 
diff --git a/autosklearn/pipeline/components/regression/k_nearest_neighbors.py b/autosklearn/pipeline/components/regression/k_nearest_neighbors.py
index e4943e2ca5..83c13cd191 100644
--- a/autosklearn/pipeline/components/regression/k_nearest_neighbors.py
+++ b/autosklearn/pipeline/components/regression/k_nearest_neighbors.py
@@ -1,9 +1,11 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 
 
 class KNearestNeighborsRegressor(AutoSklearnRegressionAlgorithm):
@@ -19,11 +21,9 @@ def fit(self, X, y):
         self.n_neighbors = int(self.n_neighbors)
         self.p = int(self.p)
 
-        self.estimator = \
-            sklearn.neighbors.KNeighborsRegressor(
-                n_neighbors=self.n_neighbors,
-                weights=self.weights,
-                p=self.p)
+        self.estimator = sklearn.neighbors.KNeighborsRegressor(
+            n_neighbors=self.n_neighbors, weights=self.weights, p=self.p
+        )
 
         if y.ndim == 2 and y.shape[1] == 1:
             y = y.flatten()
@@ -38,25 +38,29 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'KNN',
-                'name': 'K-Nearest Neighbor Classification',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': True,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "KNN",
+            "name": "K-Nearest Neighbor Classification",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
 
         n_neighbors = UniformIntegerHyperparameter(
-            name="n_neighbors", lower=1, upper=100, log=True, default_value=1)
+            name="n_neighbors", lower=1, upper=100, log=True, default_value=1
+        )
         weights = CategoricalHyperparameter(
-            name="weights", choices=["uniform", "distance"], default_value="uniform")
+            name="weights", choices=["uniform", "distance"], default_value="uniform"
+        )
         p = CategoricalHyperparameter(name="p", choices=[1, 2], default_value=2)
 
         cs.add_hyperparameters([n_neighbors, weights, p])
diff --git a/autosklearn/pipeline/components/regression/liblinear_svr.py b/autosklearn/pipeline/components/regression/liblinear_svr.py
index 73c1550ff3..e129331298 100644
--- a/autosklearn/pipeline/components/regression/liblinear_svr.py
+++ b/autosklearn/pipeline/components/regression/liblinear_svr.py
@@ -1,18 +1,29 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter, Constant
-from ConfigSpace.forbidden import ForbiddenEqualsClause, \
-    ForbiddenAndConjunction
+from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
 
 class LibLinear_SVR(AutoSklearnRegressionAlgorithm):
     # Liblinear is not deterministic as it uses a RNG inside
-    def __init__(self, loss, epsilon, dual, tol, C, fit_intercept,
-                 intercept_scaling, random_state=None):
+    def __init__(
+        self,
+        loss,
+        epsilon,
+        dual,
+        tol,
+        C,
+        fit_intercept,
+        intercept_scaling,
+        random_state=None,
+    ):
         self.epsilon = epsilon
         self.loss = loss
         self.dual = dual
@@ -34,14 +45,16 @@ def fit(self, X, y):
         self.fit_intercept = check_for_bool(self.fit_intercept)
         self.intercept_scaling = float(self.intercept_scaling)
 
-        self.estimator = sklearn.svm.LinearSVR(epsilon=self.epsilon,
-                                               loss=self.loss,
-                                               dual=self.dual,
-                                               tol=self.tol,
-                                               C=self.C,
-                                               fit_intercept=self.fit_intercept,
-                                               intercept_scaling=self.intercept_scaling,
-                                               random_state=self.random_state)
+        self.estimator = sklearn.svm.LinearSVR(
+            epsilon=self.epsilon,
+            loss=self.loss,
+            dual=self.dual,
+            tol=self.tol,
+            C=self.C,
+            fit_intercept=self.fit_intercept,
+            intercept_scaling=self.intercept_scaling,
+            random_state=self.random_state,
+        )
 
         if y.ndim == 2 and y.shape[1] == 1:
             y = y.flatten()
@@ -56,41 +69,47 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'Liblinear-SVR',
-                'name': 'Liblinear Support Vector Regression',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': False,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "Liblinear-SVR",
+            "name": "Liblinear Support Vector Regression",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": False,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
-        C = UniformFloatHyperparameter(
-            "C", 0.03125, 32768, log=True, default_value=1.0)
+        C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0)
         loss = CategoricalHyperparameter(
-            "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"],
-            default_value="squared_epsilon_insensitive")
+            "loss",
+            ["epsilon_insensitive", "squared_epsilon_insensitive"],
+            default_value="squared_epsilon_insensitive",
+        )
         # Random Guess
         epsilon = UniformFloatHyperparameter(
-            name="epsilon", lower=0.001, upper=1, default_value=0.1, log=True)
+            name="epsilon", lower=0.001, upper=1, default_value=0.1, log=True
+        )
         dual = Constant("dual", "False")
         # These are set ad-hoc
         tol = UniformFloatHyperparameter(
-            "tol", 1e-5, 1e-1, default_value=1e-4, log=True)
+            "tol", 1e-5, 1e-1, default_value=1e-4, log=True
+        )
         fit_intercept = Constant("fit_intercept", "True")
         intercept_scaling = Constant("intercept_scaling", 1)
 
-        cs.add_hyperparameters([C, loss, epsilon, dual, tol, fit_intercept,
-                                intercept_scaling])
+        cs.add_hyperparameters(
+            [C, loss, epsilon, dual, tol, fit_intercept, intercept_scaling]
+        )
 
         dual_and_loss = ForbiddenAndConjunction(
             ForbiddenEqualsClause(dual, "False"),
-            ForbiddenEqualsClause(loss, "epsilon_insensitive")
+            ForbiddenEqualsClause(loss, "epsilon_insensitive"),
         )
         cs.add_forbidden_clause(dual_and_loss)
 
diff --git a/autosklearn/pipeline/components/regression/libsvm_svr.py b/autosklearn/pipeline/components/regression/libsvm_svr.py
index 6b6c70415c..d4173d7f01 100644
--- a/autosklearn/pipeline/components/regression/libsvm_svr.py
+++ b/autosklearn/pipeline/components/regression/libsvm_svr.py
@@ -1,20 +1,35 @@
 import resource
 import sys
 
+from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.conditions import InCondition, EqualsCondition
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, \
-    UnParametrizedHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
+
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
 
 class LibSVM_SVR(AutoSklearnRegressionAlgorithm):
-    def __init__(self, kernel, C, epsilon, tol, shrinking, gamma=0.1,
-                 degree=3, coef0=0.0, verbose=False,
-                 max_iter=-1, random_state=None):
+    def __init__(
+        self,
+        kernel,
+        C,
+        epsilon,
+        tol,
+        shrinking,
+        gamma=0.1,
+        degree=3,
+        coef0=0.0,
+        verbose=False,
+        max_iter=-1,
+        random_state=None,
+    ):
         self.kernel = kernel
         self.C = C
         self.epsilon = epsilon
@@ -31,9 +46,9 @@ def __init__(self, kernel, C, epsilon, tol, shrinking, gamma=0.1,
     def fit(self, X, y):
         import sklearn.svm
 
-        # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is
-        # calculated as 2/3 of the available memory (which is calculated as the memory limit minus
-        # the used memory)
+        # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM.
+        # The cache size is calculated as 2/3 of the available memory
+        # (which is calculated as the memory limit minus the used memory)
         try:
             # Retrieve memory limits imposed on the process
             soft, hard = resource.getrlimit(resource.RLIMIT_AS)
@@ -45,9 +60,9 @@ def fit(self, X, y):
                 # Retrieve memory used by this process
                 maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024
 
-                # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms,
-                # it's in kilobytes
-                if sys.platform == 'darwin':
+                # In MacOS, the MaxRSS output of resource.getrusage in bytes;
+                # on other platforms, it's in kilobytes
+                if sys.platform == "darwin":
                     maxrss = maxrss / 1024
 
                 cache_size = (soft - maxrss) / 1.5
@@ -83,7 +98,7 @@ def fit(self, X, y):
             coef0=self.coef0,
             cache_size=cache_size,
             verbose=self.verbose,
-            max_iter=self.max_iter
+            max_iter=self.max_iter,
         )
         self.scaler = sklearn.preprocessing.StandardScaler(copy=True)
 
@@ -119,56 +134,70 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'SVR',
-                'name': 'Support Vector Regression',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'prefers_data_normalized': True,
-                'is_deterministic': True,
-                'input': (SPARSE, DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "SVR",
+            "name": "Support Vector Regression",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "prefers_data_normalized": True,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         C = UniformFloatHyperparameter(
-            name="C", lower=0.03125, upper=32768, log=True, default_value=1.0)
+            name="C", lower=0.03125, upper=32768, log=True, default_value=1.0
+        )
         # Random Guess
-        epsilon = UniformFloatHyperparameter(name="epsilon", lower=0.001,
-                                             upper=1, default_value=0.1,
-                                             log=True)
+        epsilon = UniformFloatHyperparameter(
+            name="epsilon", lower=0.001, upper=1, default_value=0.1, log=True
+        )
 
         kernel = CategoricalHyperparameter(
-            name="kernel", choices=['linear', 'poly', 'rbf', 'sigmoid'],
-            default_value="rbf")
+            name="kernel",
+            choices=["linear", "poly", "rbf", "sigmoid"],
+            default_value="rbf",
+        )
         degree = UniformIntegerHyperparameter(
-            name="degree", lower=2, upper=5, default_value=3)
+            name="degree", lower=2, upper=5, default_value=3
+        )
 
         gamma = UniformFloatHyperparameter(
-            name="gamma", lower=3.0517578125e-05, upper=8, log=True, default_value=0.1)
+            name="gamma", lower=3.0517578125e-05, upper=8, log=True, default_value=0.1
+        )
 
         # TODO this is totally ad-hoc
         coef0 = UniformFloatHyperparameter(
-            name="coef0", lower=-1, upper=1, default_value=0)
+            name="coef0", lower=-1, upper=1, default_value=0
+        )
         # probability is no hyperparameter, but an argument to the SVM algo
         shrinking = CategoricalHyperparameter(
-            name="shrinking", choices=["True", "False"], default_value="True")
+            name="shrinking", choices=["True", "False"], default_value="True"
+        )
         tol = UniformFloatHyperparameter(
-            name="tol", lower=1e-5, upper=1e-1, default_value=1e-3, log=True)
+            name="tol", lower=1e-5, upper=1e-1, default_value=1e-3, log=True
+        )
         max_iter = UnParametrizedHyperparameter("max_iter", -1)
 
         cs = ConfigurationSpace()
-        cs.add_hyperparameters([C, kernel, degree, gamma, coef0, shrinking,
-                               tol, max_iter, epsilon])
+        cs.add_hyperparameters(
+            [C, kernel, degree, gamma, coef0, shrinking, tol, max_iter, epsilon]
+        )
 
         degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
-        gamma_depends_on_kernel = InCondition(child=gamma, parent=kernel,
-                                              values=('poly', 'rbf'))
-        coef0_depends_on_kernel = InCondition(child=coef0, parent=kernel,
-                                              values=('poly', 'sigmoid'))
-        cs.add_conditions([degree_depends_on_poly, gamma_depends_on_kernel,
-                           coef0_depends_on_kernel])
+        gamma_depends_on_kernel = InCondition(
+            child=gamma, parent=kernel, values=("poly", "rbf")
+        )
+        coef0_depends_on_kernel = InCondition(
+            child=coef0, parent=kernel, values=("poly", "sigmoid")
+        )
+        cs.add_conditions(
+            [degree_depends_on_poly, gamma_depends_on_kernel, coef0_depends_on_kernel]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/regression/mlp.py b/autosklearn/pipeline/components/regression/mlp.py
index 8eec40a2cc..645c29403a 100644
--- a/autosklearn/pipeline/components/regression/mlp.py
+++ b/autosklearn/pipeline/components/regression/mlp.py
@@ -1,27 +1,43 @@
 import numpy as np
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, UnParametrizedHyperparameter, Constant, \
-    CategoricalHyperparameter
 from ConfigSpace.conditions import InCondition
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
 )
-from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
 
-class MLPRegressor(
-    IterativeComponent,
-    AutoSklearnRegressionAlgorithm
-):
-    def __init__(self, hidden_layer_depth, num_nodes_per_layer, activation, alpha,
-                 learning_rate_init, early_stopping, solver, batch_size,
-                 n_iter_no_change, tol, shuffle, beta_1, beta_2, epsilon,
-                 validation_fraction=None, random_state=None, verbose=0):
+class MLPRegressor(IterativeComponent, AutoSklearnRegressionAlgorithm):
+    def __init__(
+        self,
+        hidden_layer_depth,
+        num_nodes_per_layer,
+        activation,
+        alpha,
+        learning_rate_init,
+        early_stopping,
+        solver,
+        batch_size,
+        n_iter_no_change,
+        tol,
+        shuffle,
+        beta_1,
+        beta_2,
+        epsilon,
+        validation_fraction=None,
+        random_state=None,
+        verbose=0,
+    ):
         self.hidden_layer_depth = hidden_layer_depth
         self.num_nodes_per_layer = num_nodes_per_layer
         self.max_iter = self.get_max_iter()
@@ -52,11 +68,10 @@ def get_current_iter(self):
         return self.estimator.n_iter_
 
     def iterative_fit(self, X, y, n_iter=2, refit=False):
-        """
-        Set n_iter=2 for the same reason as for SGD
-        """
-        from sklearn.neural_network import MLPRegressor
+        """Set n_iter=2 for the same reason as for SGD"""
         import sklearn.preprocessing
+        from sklearn.neural_network import MLPRegressor
+
         n_iter = max(n_iter, 2)
 
         if refit:
@@ -69,8 +84,9 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
             self.max_iter = int(self.max_iter)
             self.hidden_layer_depth = int(self.hidden_layer_depth)
             self.num_nodes_per_layer = int(self.num_nodes_per_layer)
-            self.hidden_layer_sizes = tuple(self.num_nodes_per_layer
-                                            for i in range(self.hidden_layer_depth))
+            self.hidden_layer_sizes = tuple(
+                self.num_nodes_per_layer for i in range(self.hidden_layer_depth)
+            )
             self.activation = str(self.activation)
             self.alpha = float(self.alpha)
             self.learning_rate_init = float(self.learning_rate_init)
@@ -86,7 +102,9 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
                 self.n_iter_no_change = int(self.n_iter_no_change)
                 self.early_stopping_val = True
             else:
-                raise ValueError("Set early stopping to unknown value %s" % self.early_stopping)
+                raise ValueError(
+                    "Set early stopping to unknown value %s" % self.early_stopping
+                )
             # elif self.early_stopping == "off":
             #     self.validation_fraction = 0
             #     self.tol = 10000
@@ -172,7 +190,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
     def configuration_fully_fitted(self):
         if self.estimator is None:
             return False
-        elif not hasattr(self, '_fully_fit'):
+        elif not hasattr(self, "_fully_fit"):
             return False
         else:
             return self._fully_fit
@@ -193,43 +211,56 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'MLP',
-                'name': 'Multilayer Percepton',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "MLP",
+            "name": "Multilayer Percepton",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
-        hidden_layer_depth = UniformIntegerHyperparameter(name="hidden_layer_depth",
-                                                          lower=1, upper=3, default_value=1)
-        num_nodes_per_layer = UniformIntegerHyperparameter(name="num_nodes_per_layer",
-                                                           lower=16, upper=264, default_value=32,
-                                                           log=True)
-        activation = CategoricalHyperparameter(name="activation", choices=['tanh', 'relu'],
-                                               default_value='tanh')
-        alpha = UniformFloatHyperparameter(name="alpha", lower=1e-7, upper=1e-1, default_value=1e-4,
-                                           log=True)
-
-        learning_rate_init = UniformFloatHyperparameter(name="learning_rate_init",
-                                                        lower=1e-4, upper=0.5, default_value=1e-3,
-                                                        log=True)
+        hidden_layer_depth = UniformIntegerHyperparameter(
+            name="hidden_layer_depth", lower=1, upper=3, default_value=1
+        )
+        num_nodes_per_layer = UniformIntegerHyperparameter(
+            name="num_nodes_per_layer", lower=16, upper=264, default_value=32, log=True
+        )
+        activation = CategoricalHyperparameter(
+            name="activation", choices=["tanh", "relu"], default_value="tanh"
+        )
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=1e-7, upper=1e-1, default_value=1e-4, log=True
+        )
+
+        learning_rate_init = UniformFloatHyperparameter(
+            name="learning_rate_init",
+            lower=1e-4,
+            upper=0.5,
+            default_value=1e-3,
+            log=True,
+        )
 
         # Not allowing to turn off early stopping
-        early_stopping = CategoricalHyperparameter(name="early_stopping",
-                                                   choices=["valid", "train"],  # , "off"],
-                                                   default_value="valid")
+        early_stopping = CategoricalHyperparameter(
+            name="early_stopping",
+            choices=["valid", "train"],  # , "off"],
+            default_value="valid",
+        )
         # Constants
-        n_iter_no_change = Constant(name="n_iter_no_change", value=32)  # default=10 is too low
+        n_iter_no_change = Constant(
+            name="n_iter_no_change", value=32
+        )  # default=10 is too low
         validation_fraction = Constant(name="validation_fraction", value=0.1)
         tol = UnParametrizedHyperparameter(name="tol", value=1e-4)
-        solver = Constant(name="solver", value='adam')
+        solver = Constant(name="solver", value="adam")
 
         # Relying on sklearn defaults for now
         batch_size = UnParametrizedHyperparameter(name="batch_size", value="auto")
@@ -247,17 +278,33 @@ def get_hyperparameter_search_space(dataset_properties=None):
         # max_fun --> only used when solver=lbfgs
         # activation=["identity", "logistic"] --> not useful for classification
 
-        cs.add_hyperparameters([hidden_layer_depth, num_nodes_per_layer,
-                                activation, alpha,
-                                learning_rate_init, early_stopping,
-                                n_iter_no_change, validation_fraction, tol,
-                                solver, batch_size, shuffle,
-                                beta_1, beta_2, epsilon])
-
-        validation_fraction_cond = InCondition(validation_fraction, early_stopping, ["valid"])
+        cs.add_hyperparameters(
+            [
+                hidden_layer_depth,
+                num_nodes_per_layer,
+                activation,
+                alpha,
+                learning_rate_init,
+                early_stopping,
+                n_iter_no_change,
+                validation_fraction,
+                tol,
+                solver,
+                batch_size,
+                shuffle,
+                beta_1,
+                beta_2,
+                epsilon,
+            ]
+        )
+
+        validation_fraction_cond = InCondition(
+            validation_fraction, early_stopping, ["valid"]
+        )
         cs.add_conditions([validation_fraction_cond])
         # We always use early stopping
-        # n_iter_no_change_cond = InCondition(n_iter_no_change, early_stopping, ["valid", "train"])
+        # n_iter_no_change_cond = \
+        #   InCondition(n_iter_no_change, early_stopping, ["valid", "train"])
         # tol_cond = InCondition(n_iter_no_change, early_stopping, ["valid", "train"])
         # cs.add_conditions([n_iter_no_change_cond, tol_cond])
 
diff --git a/autosklearn/pipeline/components/regression/random_forest.py b/autosklearn/pipeline/components/regression/random_forest.py
index eeaddb9e1a..128113fc43 100644
--- a/autosklearn/pipeline/components/regression/random_forest.py
+++ b/autosklearn/pipeline/components/regression/random_forest.py
@@ -1,12 +1,16 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter, UnParametrizedHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool, check_none
 
 
@@ -14,10 +18,20 @@ class RandomForest(
     IterativeComponent,
     AutoSklearnRegressionAlgorithm,
 ):
-    def __init__(self, criterion, max_features,
-                 max_depth, min_samples_split, min_samples_leaf,
-                 min_weight_fraction_leaf, bootstrap, max_leaf_nodes,
-                 min_impurity_decrease, random_state=None, n_jobs=1):
+    def __init__(
+        self,
+        criterion,
+        max_features,
+        max_depth,
+        min_samples_split,
+        min_samples_leaf,
+        min_weight_fraction_leaf,
+        bootstrap,
+        max_leaf_nodes,
+        min_impurity_decrease,
+        random_state=None,
+        n_jobs=1,
+    ):
         self.n_estimators = self.get_max_iter()
         self.criterion = criterion
         self.max_features = max_features
@@ -79,11 +93,13 @@ def iterative_fit(self, X, y, n_iter=1, refit=False):
                 min_impurity_decrease=self.min_impurity_decrease,
                 random_state=self.random_state,
                 n_jobs=self.n_jobs,
-                warm_start=True)
+                warm_start=True,
+            )
         else:
             self.estimator.n_estimators += n_iter
-            self.estimator.n_estimators = min(self.estimator.n_estimators,
-                                              self.n_estimators)
+            self.estimator.n_estimators = min(
+                self.estimator.n_estimators, self.n_estimators
+            )
 
         if y.ndim == 2 and y.shape[1] == 1:
             y = y.flatten()
@@ -104,45 +120,63 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'RF',
-                'name': 'Random Forest Regressor',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': True,
-                'prefers_data_normalized': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "RF",
+            "name": "Random Forest Regressor",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": True,
+            "prefers_data_normalized": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
-        criterion = CategoricalHyperparameter("criterion",
-                                              ['mse', 'friedman_mse', 'mae'])
+        criterion = CategoricalHyperparameter(
+            "criterion", ["mse", "friedman_mse", "mae"]
+        )
 
         # In contrast to the random forest classifier we want to use more max_features
         # and therefore have this not on a sqrt scale
         max_features = UniformFloatHyperparameter(
-            "max_features", 0.1, 1.0, default_value=1.0)
+            "max_features", 0.1, 1.0, default_value=1.0
+        )
 
         max_depth = UnParametrizedHyperparameter("max_depth", "None")
         min_samples_split = UniformIntegerHyperparameter(
-            "min_samples_split", 2, 20, default_value=2)
+            "min_samples_split", 2, 20, default_value=2
+        )
         min_samples_leaf = UniformIntegerHyperparameter(
-            "min_samples_leaf", 1, 20, default_value=1)
-        min_weight_fraction_leaf = \
-            UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
+            "min_samples_leaf", 1, 20, default_value=1
+        )
+        min_weight_fraction_leaf = UnParametrizedHyperparameter(
+            "min_weight_fraction_leaf", 0.0
+        )
         max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
         min_impurity_decrease = UnParametrizedHyperparameter(
-            'min_impurity_decrease', 0.0)
+            "min_impurity_decrease", 0.0
+        )
         bootstrap = CategoricalHyperparameter(
-            "bootstrap", ["True", "False"], default_value="True")
-
-        cs.add_hyperparameters([criterion, max_features,
-                                max_depth, min_samples_split, min_samples_leaf,
-                                min_weight_fraction_leaf, max_leaf_nodes,
-                                min_impurity_decrease, bootstrap])
+            "bootstrap", ["True", "False"], default_value="True"
+        )
+
+        cs.add_hyperparameters(
+            [
+                criterion,
+                max_features,
+                max_depth,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_fraction_leaf,
+                max_leaf_nodes,
+                min_impurity_decrease,
+                bootstrap,
+            ]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/components/regression/sgd.py b/autosklearn/pipeline/components/regression/sgd.py
index 8b3e7dbd34..3b3f939fa8 100644
--- a/autosklearn/pipeline/components/regression/sgd.py
+++ b/autosklearn/pipeline/components/regression/sgd.py
@@ -1,13 +1,16 @@
+from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    CategoricalHyperparameter, UnParametrizedHyperparameter
-from ConfigSpace.conditions import InCondition, EqualsCondition
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UnParametrizedHyperparameter,
+)
 
 from autosklearn.pipeline.components.base import (
     AutoSklearnRegressionAlgorithm,
     IterativeComponent,
 )
-from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
+from autosklearn.pipeline.constants import DENSE, PREDICTIONS, SPARSE, UNSIGNED_DATA
 from autosklearn.util.common import check_for_bool
 
 
@@ -15,9 +18,21 @@ class SGD(
     IterativeComponent,
     AutoSklearnRegressionAlgorithm,
 ):
-    def __init__(self, loss, penalty, alpha, fit_intercept, tol,
-                 learning_rate, l1_ratio=0.15, epsilon=0.1,
-                 eta0=0.01, power_t=0.5, average=False, random_state=None):
+    def __init__(
+        self,
+        loss,
+        penalty,
+        alpha,
+        fit_intercept,
+        tol,
+        learning_rate,
+        l1_ratio=0.15,
+        epsilon=0.1,
+        eta0=0.01,
+        power_t=0.5,
+        average=False,
+        random_state=None,
+    ):
         self.max_iter = self.get_max_iter()
         self.loss = loss
         self.penalty = penalty
@@ -43,8 +58,8 @@ def get_current_iter(self):
         return self.n_iter_
 
     def iterative_fit(self, X, y, n_iter=2, refit=False):
-        from sklearn.linear_model import SGDRegressor
         import sklearn.preprocessing
+        from sklearn.linear_model import SGDRegressor
 
         # Need to fit at least two iterations, otherwise early stopping will not
         # work because we cannot determine whether the algorithm actually
@@ -62,32 +77,31 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
             self.fully_fit_ = False
 
             self.alpha = float(self.alpha)
-            self.l1_ratio = float(
-                self.l1_ratio) if self.l1_ratio is not None else 0.15
-            self.epsilon = float(
-                self.epsilon) if self.epsilon is not None else 0.1
+            self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None else 0.15
+            self.epsilon = float(self.epsilon) if self.epsilon is not None else 0.1
             self.eta0 = float(self.eta0)
-            self.power_t = float(
-                self.power_t) if self.power_t is not None else 0.25
+            self.power_t = float(self.power_t) if self.power_t is not None else 0.25
             self.average = check_for_bool(self.average)
             self.fit_intercept = check_for_bool(self.fit_intercept)
             self.tol = float(self.tol)
 
-            self.estimator = SGDRegressor(loss=self.loss,
-                                          penalty=self.penalty,
-                                          alpha=self.alpha,
-                                          fit_intercept=self.fit_intercept,
-                                          max_iter=n_iter,
-                                          tol=self.tol,
-                                          learning_rate=self.learning_rate,
-                                          l1_ratio=self.l1_ratio,
-                                          epsilon=self.epsilon,
-                                          eta0=self.eta0,
-                                          power_t=self.power_t,
-                                          shuffle=True,
-                                          average=self.average,
-                                          random_state=self.random_state,
-                                          warm_start=True)
+            self.estimator = SGDRegressor(
+                loss=self.loss,
+                penalty=self.penalty,
+                alpha=self.alpha,
+                fit_intercept=self.fit_intercept,
+                max_iter=n_iter,
+                tol=self.tol,
+                learning_rate=self.learning_rate,
+                l1_ratio=self.l1_ratio,
+                epsilon=self.epsilon,
+                eta0=self.eta0,
+                power_t=self.power_t,
+                shuffle=True,
+                average=self.average,
+                random_state=self.random_state,
+                warm_start=True,
+            )
 
             self.scaler = sklearn.preprocessing.StandardScaler(copy=True)
 
@@ -119,7 +133,8 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
 
             self.estimator._validate_params()
             self.estimator._partial_fit(
-                X, y_scaled,
+                X,
+                y_scaled,
                 alpha=self.estimator.alpha,
                 C=1.0,
                 loss=self.estimator.loss,
@@ -127,11 +142,14 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
                 max_iter=n_iter,
                 sample_weight=None,
                 coef_init=None,
-                intercept_init=None
+                intercept_init=None,
             )
             self.n_iter_ += self.estimator.n_iter_
 
-        if self.estimator.max_iter >= self.max_iter or self.estimator.max_iter > self.n_iter_:
+        if (
+            self.estimator.max_iter >= self.max_iter
+            or self.estimator.max_iter > self.n_iter_
+        ):
             self.fully_fit_ = True
 
         return self
@@ -139,7 +157,7 @@ def iterative_fit(self, X, y, n_iter=2, refit=False):
     def configuration_fully_fitted(self):
         if self.estimator is None:
             return False
-        elif not hasattr(self, 'fully_fit_'):
+        elif not hasattr(self, "fully_fit_"):
             return False
         else:
             return self.fully_fit_
@@ -152,18 +170,19 @@ def predict(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'SGD Regressor',
-                'name': 'Stochastic Gradient Descent Regressor',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'handles_sparse': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,),
-                }
+        return {
+            "shortname": "SGD Regressor",
+            "name": "Stochastic Gradient Descent Regressor",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "handles_sparse": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -171,34 +190,58 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
         loss = CategoricalHyperparameter(
             "loss",
-            ["squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"],
+            [
+                "squared_loss",
+                "huber",
+                "epsilon_insensitive",
+                "squared_epsilon_insensitive",
+            ],
             default_value="squared_loss",
-            )
+        )
         penalty = CategoricalHyperparameter(
-            "penalty", ["l1", "l2", "elasticnet"], default_value="l2")
+            "penalty", ["l1", "l2", "elasticnet"], default_value="l2"
+        )
         alpha = UniformFloatHyperparameter(
-            "alpha", 1e-7, 1e-1, log=True, default_value=0.0001)
+            "alpha", 1e-7, 1e-1, log=True, default_value=0.0001
+        )
         l1_ratio = UniformFloatHyperparameter(
-            "l1_ratio", 1e-9, 1., log=True, default_value=0.15)
-        fit_intercept = UnParametrizedHyperparameter(
-            "fit_intercept", "True")
+            "l1_ratio", 1e-9, 1.0, log=True, default_value=0.15
+        )
+        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
         tol = UniformFloatHyperparameter(
-            "tol", 1e-5, 1e-1, default_value=1e-4, log=True)
+            "tol", 1e-5, 1e-1, default_value=1e-4, log=True
+        )
         epsilon = UniformFloatHyperparameter(
-            "epsilon", 1e-5, 1e-1, default_value=0.1, log=True)
+            "epsilon", 1e-5, 1e-1, default_value=0.1, log=True
+        )
         learning_rate = CategoricalHyperparameter(
-            "learning_rate", ["optimal", "invscaling", "constant"],
-            default_value="invscaling")
+            "learning_rate",
+            ["optimal", "invscaling", "constant"],
+            default_value="invscaling",
+        )
         eta0 = UniformFloatHyperparameter(
-            "eta0", 1e-7, 1e-1, default_value=0.01, log=True)
-        power_t = UniformFloatHyperparameter(
-            "power_t", 1e-5, 1, default_value=0.25)
+            "eta0", 1e-7, 1e-1, default_value=0.01, log=True
+        )
+        power_t = UniformFloatHyperparameter("power_t", 1e-5, 1, default_value=0.25)
         average = CategoricalHyperparameter(
-            "average", ["False", "True"], default_value="False")
-
-        cs.add_hyperparameters([loss, penalty, alpha, l1_ratio, fit_intercept,
-                                tol, epsilon, learning_rate, eta0,
-                                power_t, average])
+            "average", ["False", "True"], default_value="False"
+        )
+
+        cs.add_hyperparameters(
+            [
+                loss,
+                penalty,
+                alpha,
+                l1_ratio,
+                fit_intercept,
+                tol,
+                epsilon,
+                learning_rate,
+                eta0,
+                power_t,
+                average,
+            ]
+        )
 
         # TODO add passive/aggressive here, although not properly documented?
         elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet")
@@ -206,17 +249,16 @@ def get_hyperparameter_search_space(dataset_properties=None):
             epsilon,
             loss,
             ["huber", "epsilon_insensitive", "squared_epsilon_insensitive"],
-            )
+        )
 
         # eta0 is only relevant if learning_rate!='optimal' according to code
         # https://github.com/scikit-learn/scikit-learn/blob/0.19.X/sklearn/
         # linear_model/sgd_fast.pyx#L603
-        eta0_in_inv_con = InCondition(eta0, learning_rate, ["invscaling",
-                                                            "constant"])
-        power_t_condition = EqualsCondition(power_t, learning_rate,
-                                            "invscaling")
+        eta0_in_inv_con = InCondition(eta0, learning_rate, ["invscaling", "constant"])
+        power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling")
 
-        cs.add_conditions([elasticnet, epsilon_condition, power_t_condition,
-                           eta0_in_inv_con])
+        cs.add_conditions(
+            [elasticnet, epsilon_condition, power_t_condition, eta0_in_inv_con]
+        )
 
         return cs
diff --git a/autosklearn/pipeline/constants.py b/autosklearn/pipeline/constants.py
index 924baa185a..9dea960847 100644
--- a/autosklearn/pipeline/constants.py
+++ b/autosklearn/pipeline/constants.py
@@ -7,24 +7,29 @@
 MULTIOUTPUT_REGRESSION = 5
 
 REGRESSION_TASKS = [REGRESSION, MULTIOUTPUT_REGRESSION]
-CLASSIFICATION_TASKS = [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION,
-                        MULTILABEL_CLASSIFICATION]
+CLASSIFICATION_TASKS = [
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+]
 
 TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS
 
-TASK_TYPES_TO_STRING = \
-    {BINARY_CLASSIFICATION: "binary.classification",
-     MULTICLASS_CLASSIFICATION: "multiclass.classification",
-     MULTILABEL_CLASSIFICATION: "multilabel.classification",
-     REGRESSION: "regression",
-     MULTIOUTPUT_REGRESSION: "multioutput.regression"}
+TASK_TYPES_TO_STRING = {
+    BINARY_CLASSIFICATION: "binary.classification",
+    MULTICLASS_CLASSIFICATION: "multiclass.classification",
+    MULTILABEL_CLASSIFICATION: "multilabel.classification",
+    REGRESSION: "regression",
+    MULTIOUTPUT_REGRESSION: "multioutput.regression",
+}
 
-STRING_TO_TASK_TYPES = \
-    {"binary.classification": BINARY_CLASSIFICATION,
-     "multiclass.classification": MULTICLASS_CLASSIFICATION,
-     "multilabel.classification": MULTILABEL_CLASSIFICATION,
-     "regression": REGRESSION,
-     "multioutput.regression": MULTIOUTPUT_REGRESSION}
+STRING_TO_TASK_TYPES = {
+    "binary.classification": BINARY_CLASSIFICATION,
+    "multiclass.classification": MULTICLASS_CLASSIFICATION,
+    "multilabel.classification": MULTILABEL_CLASSIFICATION,
+    "regression": REGRESSION,
+    "multioutput.regression": MULTIOUTPUT_REGRESSION,
+}
 
 DENSE = 6
 SPARSE = 7
@@ -34,10 +39,11 @@
 SIGNED_DATA = 10
 UNSIGNED_DATA = 11
 
-DATASET_PROPERTIES_TO_STRING = \
-    {DENSE:         'dense',
-     SPARSE:        'sparse',
-     PREDICTIONS:   'predictions',
-     INPUT:         'input',
-     SIGNED_DATA:   'signed data',
-     UNSIGNED_DATA: 'unsigned data'}
+DATASET_PROPERTIES_TO_STRING = {
+    DENSE: "dense",
+    SPARSE: "sparse",
+    PREDICTIONS: "predictions",
+    INPUT: "input",
+    SIGNED_DATA: "signed data",
+    UNSIGNED_DATA: "unsigned data",
+}
diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py
index 8c2ac33f0f..dff69acc6e 100644
--- a/autosklearn/pipeline/create_searchspace_util.py
+++ b/autosklearn/pipeline/create_searchspace_util.py
@@ -1,18 +1,21 @@
 import itertools
 
 import numpy as np
+from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 
-from ConfigSpace.forbidden import ForbiddenAndConjunction
-from ConfigSpace.forbidden import ForbiddenEqualsClause
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    PREDICTIONS,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
 
-from autosklearn.pipeline.constants import \
-    SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS, INPUT, DENSE, SPARSE
 
-
-def get_match_array(pipeline, dataset_properties,
-                    include=None, exclude=None):
-    sparse = dataset_properties.get('sparse')
-    signed = dataset_properties.get('signed')
+def get_match_array(pipeline, dataset_properties, include=None, exclude=None):
+    sparse = dataset_properties.get("sparse")
+    signed = dataset_properties.get("signed")
 
     # Duck typing, not sure if it's good...
     node_i_is_choice = []
@@ -24,18 +27,24 @@ def get_match_array(pipeline, dataset_properties,
         is_choice = hasattr(node, "get_available_components")
         node_i_is_choice.append(is_choice)
 
-        node_include = include.get(
-            node_name) if include is not None else None
-        node_exclude = exclude.get(
-            node_name) if exclude is not None else None
+        node_include = include.get(node_name) if include is not None else None
+        node_exclude = exclude.get(node_name) if exclude is not None else None
 
         if is_choice:
-            node_i_choices_names.append(list(node.get_available_components(
-                dataset_properties, include=node_include,
-                exclude=node_exclude).keys()))
-            node_i_choices.append(list(node.get_available_components(
-                dataset_properties, include=node_include,
-                exclude=node_exclude).values()))
+            node_i_choices_names.append(
+                list(
+                    node.get_available_components(
+                        dataset_properties, include=node_include, exclude=node_exclude
+                    ).keys()
+                )
+            )
+            node_i_choices.append(
+                list(
+                    node.get_available_components(
+                        dataset_properties, include=node_include, exclude=node_exclude
+                    ).values()
+                )
+            )
 
         else:
             node_i_choices.append([node])
@@ -47,20 +56,24 @@ def get_match_array(pipeline, dataset_properties,
 
     pipeline_idxs = [range(dim) for dim in matches_dimensions]
     for pipeline_instantiation_idxs in itertools.product(*pipeline_idxs):
-        pipeline_instantiation = [node_i_choices[i][idx] for i, idx in
-                                  enumerate(pipeline_instantiation_idxs)]
+        pipeline_instantiation = [
+            node_i_choices[i][idx] for i, idx in enumerate(pipeline_instantiation_idxs)
+        ]
 
         data_is_sparse = sparse
         dataset_is_signed = signed
         for node in pipeline_instantiation:
-            node_input = node.get_properties()['input']
-            node_output = node.get_properties()['output']
+            node_input = node.get_properties()["input"]
+            node_output = node.get_properties()["output"]
 
             # First check if these two instantiations of this node can work
             # together. Do this in multiple if statements to maintain
             # readability
-            if (data_is_sparse and SPARSE not in node_input) or \
-                    not data_is_sparse and DENSE not in node_input:
+            if (
+                (data_is_sparse and SPARSE not in node_input)
+                or not data_is_sparse
+                and DENSE not in node_input
+            ):
                 matches[pipeline_instantiation_idxs] = 0
                 break
             # No need to check if the node can handle SIGNED_DATA; this is
@@ -69,10 +82,16 @@ def get_match_array(pipeline, dataset_properties,
                 matches[pipeline_instantiation_idxs] = 0
                 break
 
-            if (INPUT in node_output and DENSE not in node_output and SPARSE not in node_output) \
-               or PREDICTIONS in node_output \
-               or (not data_is_sparse and DENSE in node_input and DENSE in node_output) \
-               or (data_is_sparse and SPARSE in node_input and SPARSE in node_output):
+            if (
+                (
+                    INPUT in node_output
+                    and DENSE not in node_output
+                    and SPARSE not in node_output
+                )
+                or PREDICTIONS in node_output
+                or (not data_is_sparse and DENSE in node_input and DENSE in node_output)
+                or (data_is_sparse and SPARSE in node_input and SPARSE in node_output)
+            ):
                 # Don't change the data_is_sparse flag
                 pass
             elif data_is_sparse and DENSE in node_output:
@@ -87,8 +106,11 @@ def get_match_array(pipeline, dataset_properties,
 
             if PREDICTIONS in node_output:
                 pass
-            elif (INPUT in node_output and SIGNED_DATA not in node_output and
-                  UNSIGNED_DATA not in node_output):
+            elif (
+                INPUT in node_output
+                and SIGNED_DATA not in node_output
+                and UNSIGNED_DATA not in node_output
+            ):
                 pass
             elif SIGNED_DATA in node_output:
                 dataset_is_signed = True
@@ -103,27 +125,32 @@ def get_match_array(pipeline, dataset_properties,
     return matches
 
 
-def find_active_choices(matches, node, node_idx, dataset_properties, include=None, exclude=None):
+def find_active_choices(
+    matches, node, node_idx, dataset_properties, include=None, exclude=None
+):
     if not hasattr(node, "get_available_components"):
         raise ValueError()
-    available_components = node.get_available_components(dataset_properties,
-                                                         include=include,
-                                                         exclude=exclude)
-    assert matches.shape[node_idx] == len(available_components), \
-        (matches.shape[node_idx], len(available_components))
+    available_components = node.get_available_components(
+        dataset_properties, include=include, exclude=exclude
+    )
+    assert matches.shape[node_idx] == len(available_components), (
+        matches.shape[node_idx],
+        len(available_components),
+    )
 
     choices = []
     for c_idx, component in enumerate(available_components):
-        slices = tuple(slice(None) if idx != node_idx else slice(c_idx, c_idx+1)
-                       for idx in range(len(matches.shape)))
+        slices = tuple(
+            slice(None) if idx != node_idx else slice(c_idx, c_idx + 1)
+            for idx in range(len(matches.shape))
+        )
 
         if np.sum(matches[slices]) > 0:
             choices.append(component)
     return choices
 
 
-def add_forbidden(conf_space, pipeline, matches, dataset_properties,
-                  include, exclude):
+def add_forbidden(conf_space, pipeline, matches, dataset_properties, include, exclude):
     # Not sure if this works for 3D
     node_i_is_choice = []
     node_i_choices_names = []
@@ -134,18 +161,20 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
         is_choice = hasattr(node, "get_available_components")
         node_i_is_choice.append(is_choice)
 
-        node_include = include.get(
-            node_name) if include is not None else None
-        node_exclude = exclude.get(
-            node_name) if exclude is not None else None
+        node_include = include.get(node_name) if include is not None else None
+        node_exclude = exclude.get(node_name) if exclude is not None else None
 
         if is_choice:
-            node_i_choices_names.append(node.get_available_components(
-                dataset_properties, include=node_include,
-                exclude=node_exclude).keys())
-            node_i_choices.append(node.get_available_components(
-                dataset_properties, include=node_include,
-                exclude=node_exclude).values())
+            node_i_choices_names.append(
+                node.get_available_components(
+                    dataset_properties, include=node_include, exclude=node_exclude
+                ).keys()
+            )
+            node_i_choices.append(
+                node.get_available_components(
+                    dataset_properties, include=node_include, exclude=node_exclude
+                ).values()
+            )
 
         else:
             node_i_choices_names.append([node_name])
@@ -185,8 +214,8 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                 for idx in indices:
                     node = all_nodes[idx]
                     available_components = node.get_available_components(
-                        dataset_properties,
-                        include=node_i_choices_names[idx])
+                        dataset_properties, include=node_i_choices_names[idx]
+                    )
                     assert len(available_components) > 0, len(available_components)
                     skip_array_shape.append(len(available_components))
                     num_node_choices.append(range(len(available_components)))
@@ -198,9 +227,11 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                     for node_idx, choice_idx in enumerate(product):
                         node_idx += start_idx
                         slices_ = tuple(
-                            slice(None) if idx != node_idx else
-                            slice(choice_idx, choice_idx + 1) for idx in
-                            range(len(matches.shape)))
+                            slice(None)
+                            if idx != node_idx
+                            else slice(choice_idx, choice_idx + 1)
+                            for idx in range(len(matches.shape))
+                        )
 
                         if np.sum(matches[slices_]) == 0:
                             skip_array[product] = 1
@@ -210,10 +241,13 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                         continue
 
                     slices = tuple(
-                        slice(None) if idx not in indices else
-                        slice(product[idx - start_idx],
-                              product[idx - start_idx] + 1) for idx in
-                        range(len(matches.shape)))
+                        slice(None)
+                        if idx not in indices
+                        else slice(
+                            product[idx - start_idx], product[idx - start_idx] + 1
+                        )
+                        for idx in range(len(matches.shape))
+                    )
 
                     # This prints the affected nodes
                     # print [node_choice_names[i][product[i]]
@@ -221,9 +255,12 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                     #     np.sum(matches[slices])
 
                     if np.sum(matches[slices]) == 0:
-                        constraint = tuple([(node_names[i],
-                                             node_choice_names[i][product[i]])
-                                            for i in range(len(product))])
+                        constraint = tuple(
+                            [
+                                (node_names[i], node_choice_names[i][product[i]])
+                                for i in range(len(product))
+                            ]
+                        )
 
                         # Check if a more general constraint/forbidden clause
                         #  was already added
@@ -231,8 +268,12 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                         for constraint_length in range(2, len(constraint)):
                             constr_starts = len(constraint) - constraint_length + 1
                             for constraint_start_idx in range(constr_starts):
-                                constraint_end_idx = constraint_start_idx + constraint_length
-                                sub_constraint = constraint[constraint_start_idx:constraint_end_idx]
+                                constraint_end_idx = (
+                                    constraint_start_idx + constraint_length
+                                )
+                                sub_constraint = constraint[
+                                    constraint_start_idx:constraint_end_idx
+                                ]
                                 if sub_constraint in constraints:
                                     continue_ = True
                                     break
@@ -246,9 +287,13 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                         forbiddens = []
                         for i in range(len(product)):
                             forbiddens.append(
-                                ForbiddenEqualsClause(conf_space.get_hyperparameter(
-                                    node_names[i] + ":__choice__"),
-                                    node_choice_names[i][product[i]]))
+                                ForbiddenEqualsClause(
+                                    conf_space.get_hyperparameter(
+                                        node_names[i] + ":__choice__"
+                                    ),
+                                    node_choice_names[i][product[i]],
+                                )
+                            )
                         forbidden = ForbiddenAndConjunction(*forbiddens)
                         conf_space.add_forbidden_clause(forbidden)
 
diff --git a/autosklearn/pipeline/implementations/CategoryShift.py b/autosklearn/pipeline/implementations/CategoryShift.py
index 3eee659d3f..4c504cf666 100644
--- a/autosklearn/pipeline/implementations/CategoryShift.py
+++ b/autosklearn/pipeline/implementations/CategoryShift.py
@@ -1,12 +1,11 @@
 import numpy as np
 from scipy import sparse
-from sklearn.utils import check_array
 from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.utils import check_array
 
 
 class CategoryShift(BaseEstimator, TransformerMixin):
-    """ Add 3 to every category.
-    """
+    """Add 3 to every category."""
 
     def __init__(self, random_state=None):
         self.random_state = random_state
@@ -15,14 +14,16 @@ def _convert_and_check_X(self, X):
         X_data = X.data if sparse.issparse(X) else X
 
         # Check if data is numeric and positive
-        if X_data.dtype.kind not in set('buif') or np.nanmin(X_data) < 0:
-            raise ValueError('Categories should be non-negative numbers. '
-                             'NOTE: floats will be casted to integers.')
+        if X_data.dtype.kind not in set("buif") or np.nanmin(X_data) < 0:
+            raise ValueError(
+                "Categories should be non-negative numbers. "
+                "NOTE: floats will be casted to integers."
+            )
 
         # Use check_array to make sure we are using the right kind of sparse array
         # Notice that we cannot convert the array to integer right now. That would get
         # rid of the np.nans and we need them later on for the imputation.
-        X = check_array(X, accept_sparse='csc', force_all_finite=False, copy=True)
+        X = check_array(X, accept_sparse="csc", force_all_finite=False, copy=True)
         return X
 
     def fit(self, X, y=None):
diff --git a/autosklearn/pipeline/implementations/MinorityCoalescer.py b/autosklearn/pipeline/implementations/MinorityCoalescer.py
index 989cb4a9c9..e9bed0349e 100644
--- a/autosklearn/pipeline/implementations/MinorityCoalescer.py
+++ b/autosklearn/pipeline/implementations/MinorityCoalescer.py
@@ -4,7 +4,7 @@
 
 
 class MinorityCoalescer(BaseEstimator, TransformerMixin):
-    """ Group together categories which occurence is less than a specified
+    """Group together categories which occurence is less than a specified
     minimum fraction. Coalesced categories get index of one.
     """
 
@@ -31,7 +31,8 @@ def fit(self, X, y=None):
                 indptr_start = X.indptr[column]
                 indptr_end = X.indptr[column + 1]
                 unique, counts = np.unique(
-                    X.data[indptr_start:indptr_end], return_counts=True)
+                    X.data[indptr_start:indptr_end], return_counts=True
+                )
                 colsize = indptr_end - indptr_start
             else:
                 unique, counts = np.unique(X[:, column], return_counts=True)
@@ -61,11 +62,15 @@ def transform(self, X):
                         indptr_start = X.indptr[column]
                         indptr_end = X.indptr[column + 1]
                         X.data[indptr_start:indptr_end][
-                            X.data[indptr_start:indptr_end] == unique_value] = 1
+                            X.data[indptr_start:indptr_end] == unique_value
+                        ] = 1
             else:
                 unique = np.unique(X[:, column])
-                unique_values = [unique_value for unique_value in unique
-                                 if unique_value not in self.do_not_coalesce_[column]]
+                unique_values = [
+                    unique_value
+                    for unique_value in unique
+                    if unique_value not in self.do_not_coalesce_[column]
+                ]
                 mask = np.isin(X[:, column], unique_values)
                 X[mask, column] = 1
         return X
diff --git a/autosklearn/pipeline/implementations/SparseOneHotEncoder.py b/autosklearn/pipeline/implementations/SparseOneHotEncoder.py
index beee99261b..18d491596c 100644
--- a/autosklearn/pipeline/implementations/SparseOneHotEncoder.py
+++ b/autosklearn/pipeline/implementations/SparseOneHotEncoder.py
@@ -43,8 +43,7 @@ def fit(self, X, y=None):
     def _check_X(self, X):
         if not sparse.issparse(X):
             raise TypeError("SparseOneHotEncoder requires X to be sparse")
-        X = check_array(X, accept_sparse='csc', force_all_finite=False,
-                        dtype=np.int32)
+        X = check_array(X, accept_sparse="csc", force_all_finite=False, dtype=np.int32)
         if X.min() < 0:
             raise ValueError("X needs to contain only non-negative integers.")
 
@@ -63,15 +62,17 @@ def fit_transform(self, X, y=None):
         row_indices = X.indices
         column_indices = []
         for i in range(len(X.indptr) - 1):
-            nbr = X.indptr[i+1] - X.indptr[i]
+            nbr = X.indptr[i + 1] - X.indptr[i]
             column_indices_ = [indices[i]] * nbr
-            column_indices_ += X.data[X.indptr[i]:X.indptr[i+1]]
+            column_indices_ += X.data[X.indptr[i] : X.indptr[i + 1]]
             column_indices.extend(column_indices_)
         data = np.ones(X.data.size)
 
-        out = sparse.coo_matrix((data, (row_indices, column_indices)),
-                                shape=(n_samples, indices[-1]),
-                                dtype=np.int32).tocsc()
+        out = sparse.coo_matrix(
+            (data, (row_indices, column_indices)),
+            shape=(n_samples, indices[-1]),
+            dtype=np.int32,
+        ).tocsc()
 
         mask = np.array(out.sum(axis=0)).ravel() != 0
         active_features = np.where(mask)[0]
@@ -85,9 +86,10 @@ def transform(self, X):
         n_samples, n_features = X.shape
         indices = self.feature_indices_
         if n_features != indices.shape[0] - 1:
-            raise ValueError("X has different shape than during fitting."
-                             " Expected %d, got %d."
-                             % (indices.shape[0] - 1, n_features))
+            raise ValueError(
+                "X has different shape than during fitting."
+                " Expected %d, got %d." % (indices.shape[0] - 1, n_features)
+            )
 
         n_values_check = X.max(axis=0).toarray().flatten() + 1
 
@@ -99,7 +101,7 @@ def transform(self, X):
             for i, n_value_check in enumerate(n_values_check):
                 if (n_value_check - 1) >= self.n_values_[i]:
                     indptr_start = X.indptr[i]
-                    indptr_end = X.indptr[i+1]
+                    indptr_end = X.indptr[i + 1]
                     zeros_mask = X.data[indptr_start:indptr_end] >= self.n_values_[i]
                     X.data[indptr_start:indptr_end][zeros_mask] = 0
 
@@ -108,13 +110,15 @@ def transform(self, X):
         for i in range(len(X.indptr) - 1):
             nbr = X.indptr[i + 1] - X.indptr[i]
             column_indices_ = [indices[i]] * nbr
-            column_indices_ += X.data[X.indptr[i]:X.indptr[i + 1]]
+            column_indices_ += X.data[X.indptr[i] : X.indptr[i + 1]]
             column_indices.extend(column_indices_)
         data = np.ones(X.data.size)
 
-        out = sparse.coo_matrix((data, (row_indices, column_indices)),
-                                shape=(n_samples, indices[-1]),
-                                dtype=np.int32).tocsc()
+        out = sparse.coo_matrix(
+            (data, (row_indices, column_indices)),
+            shape=(n_samples, indices[-1]),
+            dtype=np.int32,
+        ).tocsc()
 
         out = out[:, self.active_features_]
         return out.tocsr()
diff --git a/autosklearn/pipeline/implementations/__init__.py b/autosklearn/pipeline/implementations/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/autosklearn/pipeline/implementations/__init__.py
+++ b/autosklearn/pipeline/implementations/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/autosklearn/pipeline/implementations/util.py b/autosklearn/pipeline/implementations/util.py
index cacf3a9b5d..4ebc01ad39 100644
--- a/autosklearn/pipeline/implementations/util.py
+++ b/autosklearn/pipeline/implementations/util.py
@@ -18,7 +18,7 @@ def softmax(df):
 
 
 def convert_multioutput_multiclass_to_multilabel(probas):
-    """ Converts the model predicted probabilities to useable format.
+    """Converts the model predicted probabilities to useable format.
 
     In some cases, models predicted_proba can output an array of shape
     (2, n_samples, n_labels) where the 2 stands for the probability of positive
@@ -55,8 +55,10 @@ def convert_multioutput_multiclass_to_multilabel(probas):
             # In case multioutput-multiclass input was used, where we have
             # a probability for each class
             elif n_probabilities > 2:
-                raise ValueError('Multioutput-Multiclass supported by '
-                                 'scikit-learn, but not by auto-sklearn!')
+                raise ValueError(
+                    "Multioutput-Multiclass supported by "
+                    "scikit-learn, but not by auto-sklearn!"
+                )
             else:
                 RuntimeError(f"Unkown predict_proba output={probas}")
 
@@ -64,7 +66,7 @@ def convert_multioutput_multiclass_to_multilabel(probas):
 
     elif isinstance(probas, np.ndarray):
         if len(probas.shape) > 2:
-            raise ValueError('New unsupported sklearn output!')
+            raise ValueError("New unsupported sklearn output!")
         else:
             return probas
 
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index af2885be4d..638f8ae3cb 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -1,21 +1,19 @@
+from typing import Optional, Union
+
 import copy
 from itertools import product
-from typing import Optional, Union
 
 import numpy as np
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+from ConfigSpace.forbidden import ForbiddenAndConjunction, ForbiddenEqualsClause
 from sklearn.base import RegressorMixin
 
-from ConfigSpace.forbidden import ForbiddenEqualsClause, ForbiddenAndConjunction
-
-from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
-
-
-from ConfigSpace.configuration_space import ConfigurationSpace, Configuration
-from autosklearn.pipeline.components import regression as \
-    regression_components
-from autosklearn.pipeline.components import feature_preprocessing as \
-    feature_preprocessing_components
 from autosklearn.pipeline.base import BasePipeline
+from autosklearn.pipeline.components import (
+    feature_preprocessing as feature_preprocessing_components,
+)
+from autosklearn.pipeline.components import regression as regression_components
+from autosklearn.pipeline.components.data_preprocessing import DataPreprocessorChoice
 from autosklearn.pipeline.constants import SPARSE
 
 
@@ -66,6 +64,7 @@ class SimpleRegressionPipeline(RegressorMixin, BasePipeline):
     --------
 
     """
+
     def __init__(
         self,
         config: Optional[Configuration] = None,
@@ -74,30 +73,34 @@ def __init__(
         include=None,
         exclude=None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        init_params=None
+        init_params=None,
     ):
         self._output_dtype = np.float32
         if dataset_properties is None:
             dataset_properties = dict()
-        if 'target_type' not in dataset_properties:
-            dataset_properties['target_type'] = 'regression'
+        if "target_type" not in dataset_properties:
+            dataset_properties["target_type"] = "regression"
         super().__init__(
-            config=config, steps=steps,
+            config=config,
+            steps=steps,
             dataset_properties=dataset_properties,
-            include=include, exclude=exclude, random_state=random_state,
-            init_params=init_params)
+            include=include,
+            exclude=exclude,
+            random_state=random_state,
+            init_params=init_params,
+        )
 
     def fit_estimator(self, X, y, **fit_params):
         self.y_max_ = np.nanmax(y)
         self.y_min_ = np.nanmin(y)
-        return super(SimpleRegressionPipeline, self).fit_estimator(
-            X, y, **fit_params)
+        return super(SimpleRegressionPipeline, self).fit_estimator(X, y, **fit_params)
 
     def iterative_fit(self, X, y, n_iter=1, **fit_params):
         self.y_max_ = np.nanmax(y)
         self.y_min_ = np.nanmin(y)
         return super(SimpleRegressionPipeline, self).iterative_fit(
-            X, y, n_iter=n_iter, **fit_params)
+            X, y, n_iter=n_iter, **fit_params
+        )
 
     def predict(self, X, batch_size=None):
         y = super().predict(X, batch_size=batch_size)
@@ -108,8 +111,9 @@ def predict(self, X, batch_size=None):
             y[y < (0.5 * self.y_min_)] = 0.5 * self.y_min_
         return y
 
-    def _get_hyperparameter_search_space(self, include=None, exclude=None,
-                                         dataset_properties=None):
+    def _get_hyperparameter_search_space(
+        self, include=None, exclude=None, dataset_properties=None
+    ):
         """Return the configuration space for the CASH problem.
 
         Parameters
@@ -134,43 +138,57 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
 
         if dataset_properties is None or not isinstance(dataset_properties, dict):
             dataset_properties = dict()
-        if 'target_type' not in dataset_properties:
-            dataset_properties['target_type'] = 'regression'
-        if dataset_properties['target_type'] != 'regression':
-            dataset_properties['target_type'] = 'regression'
+        if "target_type" not in dataset_properties:
+            dataset_properties["target_type"] = "regression"
+        if dataset_properties["target_type"] != "regression":
+            dataset_properties["target_type"] = "regression"
 
-        if 'sparse' not in dataset_properties:
+        if "sparse" not in dataset_properties:
             # This dataset is probably dense
-            dataset_properties['sparse'] = False
+            dataset_properties["sparse"] = False
 
         cs = self._get_base_search_space(
-            cs=cs, dataset_properties=dataset_properties,
-            exclude=exclude, include=include, pipeline=self.steps)
+            cs=cs,
+            dataset_properties=dataset_properties,
+            exclude=exclude,
+            include=include,
+            pipeline=self.steps,
+        )
 
-        regressors = cs.get_hyperparameter('regressor:__choice__').choices
-        preprocessors = cs.get_hyperparameter('feature_preprocessor:__choice__').choices
+        regressors = cs.get_hyperparameter("regressor:__choice__").choices
+        preprocessors = cs.get_hyperparameter("feature_preprocessor:__choice__").choices
         available_regressors = self._final_estimator.get_available_components(
-            dataset_properties)
+            dataset_properties
+        )
 
-        possible_default_regressor = copy.copy(list(
-            available_regressors.keys()))
-        default = cs.get_hyperparameter('regressor:__choice__').default_value
-        del possible_default_regressor[
-            possible_default_regressor.index(default)]
+        possible_default_regressor = copy.copy(list(available_regressors.keys()))
+        default = cs.get_hyperparameter("regressor:__choice__").default_value
+        del possible_default_regressor[possible_default_regressor.index(default)]
 
         # A regressor which can handle sparse data after the densifier is
         # forbidden for memory issues
         for key in regressors:
-            if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']:
-                if 'densifier' in preprocessors:
+            if (
+                SPARSE
+                in available_regressors[key].get_properties(dataset_properties=None)[
+                    "input"
+                ]
+            ):
+                if "densifier" in preprocessors:
                     while True:
                         try:
                             forb_reg = ForbiddenEqualsClause(
-                                cs.get_hyperparameter('regressor:__choice__'), key)
-                            forb_fpp = ForbiddenEqualsClause(cs.get_hyperparameter(
-                                'feature_preprocessor:__choice__'), 'densifier')
+                                cs.get_hyperparameter("regressor:__choice__"), key
+                            )
+                            forb_fpp = ForbiddenEqualsClause(
+                                cs.get_hyperparameter(
+                                    "feature_preprocessor:__choice__"
+                                ),
+                                "densifier",
+                            )
                             cs.add_forbidden_clause(
-                                ForbiddenAndConjunction(forb_reg, forb_fpp))
+                                ForbiddenAndConjunction(forb_reg, forb_fpp)
+                            )
                             # Success
                             break
                         except ValueError:
@@ -179,16 +197,25 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                                 default = possible_default_regressor.pop()
                             except IndexError:
                                 raise ValueError(
-                                    "Cannot find a legal default configuration.")
+                                    "Cannot find a legal default configuration."
+                                )
                             cs.get_hyperparameter(
-                                'regressor:__choice__').default_value = default
+                                "regressor:__choice__"
+                            ).default_value = default
 
         # which would take too long
         # Combinations of tree-based models with feature learning:
         regressors_ = [
-            "adaboost", "ard_regression", "decision_tree",
-            "extra_trees", "gaussian_process", "gradient_boosting",
-            "k_nearest_neighbors", "libsvm_svr", "mlp", "random_forest"
+            "adaboost",
+            "ard_regression",
+            "decision_tree",
+            "extra_trees",
+            "gaussian_process",
+            "gradient_boosting",
+            "k_nearest_neighbors",
+            "libsvm_svr",
+            "mlp",
+            "random_forest",
         ]
         feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]
 
@@ -199,11 +226,19 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                 continue
             while True:
                 try:
-                    cs.add_forbidden_clause(ForbiddenAndConjunction(
-                        ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "regressor:__choice__"), r),
-                        ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "feature_preprocessor:__choice__"), f)))
+                    cs.add_forbidden_clause(
+                        ForbiddenAndConjunction(
+                            ForbiddenEqualsClause(
+                                cs.get_hyperparameter("regressor:__choice__"), r
+                            ),
+                            ForbiddenEqualsClause(
+                                cs.get_hyperparameter(
+                                    "feature_preprocessor:__choice__"
+                                ),
+                                f,
+                            ),
+                        )
+                    )
                     break
                 except KeyError:
                     break
@@ -212,10 +247,10 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                     try:
                         default = possible_default_regressor.pop()
                     except IndexError:
-                        raise ValueError(
-                            "Cannot find a legal default configuration.")
+                        raise ValueError("Cannot find a legal default configuration.")
                     cs.get_hyperparameter(
-                        'regressor:__choice__').default_value = default
+                        "regressor:__choice__"
+                    ).default_value = default
 
         self.configuration_space = cs
         self.dataset_properties = dataset_properties
@@ -227,33 +262,34 @@ def _get_estimator_components(self):
     def _get_pipeline_steps(self, dataset_properties, init_params=None):
         steps = []
 
-        default_dataset_properties = {'target_type': 'regression'}
+        default_dataset_properties = {"target_type": "regression"}
         if dataset_properties is not None and isinstance(dataset_properties, dict):
             default_dataset_properties.update(dataset_properties)
 
-        steps.extend([
-            [
-                'data_preprocessor',
-                DataPreprocessorChoice(
-                    dataset_properties=default_dataset_properties,
-                    random_state=self.random_state
-                )
-            ],
-            [
-                'feature_preprocessor',
-                feature_preprocessing_components.FeaturePreprocessorChoice(
-                    dataset_properties=default_dataset_properties,
-                    random_state=self.random_state
-                )
-            ],
+        steps.extend(
             [
-                'regressor',
-                regression_components.RegressorChoice(
-                    default_dataset_properties,
-                    random_state=self.random_state
-                )
+                [
+                    "data_preprocessor",
+                    DataPreprocessorChoice(
+                        dataset_properties=default_dataset_properties,
+                        random_state=self.random_state,
+                    ),
+                ],
+                [
+                    "feature_preprocessor",
+                    feature_preprocessing_components.FeaturePreprocessorChoice(
+                        dataset_properties=default_dataset_properties,
+                        random_state=self.random_state,
+                    ),
+                ],
+                [
+                    "regressor",
+                    regression_components.RegressorChoice(
+                        default_dataset_properties, random_state=self.random_state
+                    ),
+                ],
             ]
-        ])
+        )
 
         return steps
 
diff --git a/autosklearn/pipeline/util.py b/autosklearn/pipeline/util.py
index 1a70deb30b..228c31357d 100644
--- a/autosklearn/pipeline/util.py
+++ b/autosklearn/pipeline/util.py
@@ -18,8 +18,7 @@ def find_sklearn_classes(class_):
     for root, dirs, files in os.walk(sklearn_path):
         all_subdirectories.append(root)
 
-    for module_loader, module_name, ispkg in \
-            pkgutil.iter_modules(all_subdirectories):
+    for module_loader, module_name, ispkg in pkgutil.iter_modules(all_subdirectories):
 
         # Work around some issues...
         if module_name in ["hmm", "mixture"]:
@@ -33,8 +32,7 @@ def find_sklearn_classes(class_):
         pkg = importlib.import_module(full_module_name)
 
         for member_name, obj in inspect.getmembers(pkg):
-            if inspect.isclass(obj) and \
-                    issubclass(obj, class_):
+            if inspect.isclass(obj) and issubclass(obj, class_):
                 classifier = obj
                 # print member_name, obj
                 classifiers.add(classifier)
@@ -44,15 +42,20 @@ def find_sklearn_classes(class_):
         print(classifier)
 
 
-def get_dataset(dataset='iris', make_sparse=False, add_NaNs=False,
-                train_size_maximum=150, make_multilabel=False,
-                make_binary=False):
+def get_dataset(
+    dataset="iris",
+    make_sparse=False,
+    add_NaNs=False,
+    train_size_maximum=150,
+    make_multilabel=False,
+    make_binary=False,
+):
     iris = getattr(sklearn.datasets, "load_%s" % dataset)()
     X = iris.data.astype(np.float32)
     Y = iris.target
     rs = np.random.RandomState(42)
     indices = np.arange(X.shape[0])
-    train_size = min(int(len(indices) / 3. * 2.), train_size_maximum)
+    train_size = min(int(len(indices) / 3.0 * 2.0), train_size_maximum)
     rs.shuffle(indices)
     X = X[indices]
     Y = Y[indices]
@@ -76,8 +79,10 @@ def get_dataset(dataset='iris', make_sparse=False, add_NaNs=False,
         X_test.eliminate_zeros()
 
     if make_binary and make_multilabel:
-        raise ValueError('Can convert dataset only to one of the two '
-                         'options binary or multilabel!')
+        raise ValueError(
+            "Can convert dataset only to one of the two "
+            "options binary or multilabel!"
+        )
 
     if make_binary:
         Y_train[Y_train > 1] = 1
@@ -97,23 +102,31 @@ def get_dataset(dataset='iris', make_sparse=False, add_NaNs=False,
     return X_train, Y_train, X_test, Y_test
 
 
-def _test_classifier(classifier, dataset='iris', sparse=False,
-                     train_size_maximum=150, make_multilabel=False,
-                     make_binary=False):
-    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset,
-                                                   make_sparse=sparse,
-                                                   train_size_maximum=train_size_maximum,
-                                                   make_multilabel=make_multilabel,
-                                                   make_binary=make_binary)
+def _test_classifier(
+    classifier,
+    dataset="iris",
+    sparse=False,
+    train_size_maximum=150,
+    make_multilabel=False,
+    make_binary=False,
+):
+    X_train, Y_train, X_test, Y_test = get_dataset(
+        dataset=dataset,
+        make_sparse=sparse,
+        train_size_maximum=train_size_maximum,
+        make_multilabel=make_multilabel,
+        make_binary=make_binary,
+    )
 
     configuration_space = classifier.get_hyperparameter_search_space(
-        dataset_properties={'sparse': sparse}
+        dataset_properties={"sparse": sparse}
     )
     default_config = configuration_space.get_default_configuration()
 
     classifier = classifier(random_state=0, **default_config)
 
-    if hasattr(classifier, 'iterative_fit'):
+    if hasattr(classifier, "iterative_fit"):
+
         class counter(object):
             def __init__(self, func):
                 self.n_calls = 0
@@ -122,11 +135,12 @@ def __init__(self, func):
             def __call__(self, *args, **kwargs):
                 self.n_calls += 1
                 return self.func(*args, **kwargs)
+
         classifier.iterative_fit = counter(classifier.iterative_fit)
 
     predictor = classifier.fit(X_train, Y_train)
 
-    if hasattr(classifier, 'iterative_fit'):
+    if hasattr(classifier, "iterative_fit"):
         n_calls = classifier.iterative_fit.n_calls
     else:
         n_calls = None
@@ -135,11 +149,10 @@ def __call__(self, *args, **kwargs):
     return predictions, Y_test, n_calls
 
 
-def _test_classifier_iterative_fit(classifier, dataset='iris', sparse=False):
-    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset,
-                                                   make_sparse=sparse)
+def _test_classifier_iterative_fit(classifier, dataset="iris", sparse=False):
+    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset, make_sparse=sparse)
     configuration_space = classifier.get_hyperparameter_search_space(
-        dataset_properties={'sparse': sparse}
+        dataset_properties={"sparse": sparse}
     )
     default_config = configuration_space.get_default_configuration()
 
@@ -148,7 +161,7 @@ def _test_classifier_iterative_fit(classifier, dataset='iris', sparse=False):
 
     iteration = 2
     while not classifier.configuration_fully_fitted():
-        n_iter = int(2 ** iteration / 2)
+        n_iter = int(2**iteration / 2)
         classifier.iterative_fit(X_train, Y_train, n_iter=n_iter)
         iteration += 1
 
@@ -156,15 +169,21 @@ def _test_classifier_iterative_fit(classifier, dataset='iris', sparse=False):
     return predictions, Y_test, classifier
 
 
-def _test_classifier_predict_proba(classifier, dataset='iris', sparse=False,
-                                   train_size_maximum=150,
-                                   make_multilabel=False,
-                                   make_binary=False):
-    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset,
-                                                   make_sparse=sparse,
-                                                   train_size_maximum=train_size_maximum,
-                                                   make_multilabel=make_multilabel,
-                                                   make_binary=make_binary)
+def _test_classifier_predict_proba(
+    classifier,
+    dataset="iris",
+    sparse=False,
+    train_size_maximum=150,
+    make_multilabel=False,
+    make_binary=False,
+):
+    X_train, Y_train, X_test, Y_test = get_dataset(
+        dataset=dataset,
+        make_sparse=sparse,
+        train_size_maximum=train_size_maximum,
+        make_multilabel=make_multilabel,
+        make_binary=make_binary,
+    )
     configuration_space = classifier.get_hyperparameter_search_space()
 
     default_config = configuration_space.get_default_configuration()
@@ -175,11 +194,12 @@ def _test_classifier_predict_proba(classifier, dataset='iris', sparse=False,
     return predictions, Y_test
 
 
-def _test_preprocessing(Preprocessor, dataset='iris', make_sparse=False,
-                        train_size_maximum=150):
-    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset,
-                                                   make_sparse=make_sparse,
-                                                   train_size_maximum=train_size_maximum)
+def _test_preprocessing(
+    Preprocessor, dataset="iris", make_sparse=False, train_size_maximum=150
+):
+    X_train, Y_train, X_test, Y_test = get_dataset(
+        dataset=dataset, make_sparse=make_sparse, train_size_maximum=train_size_maximum
+    )
     original_X_train = X_train.copy()
     configuration_space = Preprocessor.get_hyperparameter_search_space()
     default_config = configuration_space.get_default_configuration()
@@ -191,8 +211,9 @@ def _test_preprocessing(Preprocessor, dataset='iris', make_sparse=False,
 
 
 class PreprocessingTestCase(unittest.TestCase):
-    def _test_preprocessing_dtype(self, Preprocessor, add_NaNs=False,
-                                  test_sparse=True, dataset='iris'):
+    def _test_preprocessing_dtype(
+        self, Preprocessor, add_NaNs=False, test_sparse=True, dataset="iris"
+    ):
         # Dense
         # np.float32
         X_train, Y_train, X_test, Y_test = get_dataset(dataset, add_NaNs=add_NaNs)
@@ -220,8 +241,9 @@ def _test_preprocessing_dtype(self, Preprocessor, add_NaNs=False,
         if test_sparse is True:
             # Sparse
             # np.float32
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset, make_sparse=True,
-                                                           add_NaNs=add_NaNs)
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset, make_sparse=True, add_NaNs=add_NaNs
+            )
             self.assertEqual(X_train.dtype, np.float32)
             configuration_space = Preprocessor.get_hyperparameter_search_space()
             default_config = configuration_space.get_default_configuration()
@@ -232,9 +254,9 @@ def _test_preprocessing_dtype(self, Preprocessor, add_NaNs=False,
             # self.assertEqual(Xt.dtype, np.float32)
 
             # np.float64
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset,
-                                                           make_sparse=True,
-                                                           add_NaNs=add_NaNs)
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset, make_sparse=True, add_NaNs=add_NaNs
+            )
             X_train = X_train.astype(np.float64)
             configuration_space = Preprocessor.get_hyperparameter_search_space()
             default_config = configuration_space.get_default_configuration()
@@ -245,9 +267,8 @@ def _test_preprocessing_dtype(self, Preprocessor, add_NaNs=False,
             # self.assertEqual(Xt.dtype, np.float64)
 
 
-def _test_regressor(Regressor, dataset='diabetes', sparse=False):
-    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset,
-                                                   make_sparse=sparse)
+def _test_regressor(Regressor, dataset="diabetes", sparse=False):
+    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset, make_sparse=sparse)
     configuration_space = Regressor.get_hyperparameter_search_space()
     default_config = configuration_space.get_default_configuration()
 
@@ -258,7 +279,8 @@ def _test_regressor(Regressor, dataset='diabetes', sparse=False):
     X_test_hash = hash(str(X_test))
     Y_train_hash = hash(str(Y_train))
 
-    if hasattr(regressor, 'iterative_fit'):
+    if hasattr(regressor, "iterative_fit"):
+
         class counter(object):
             def __init__(self, func):
                 self.n_calls = 0
@@ -272,24 +294,25 @@ def __call__(self, *args, **kwargs):
 
     predictor = regressor.fit(X_train, Y_train)
 
-    if hasattr(regressor, 'iterative_fit'):
+    if hasattr(regressor, "iterative_fit"):
         n_calls = regressor.iterative_fit.n_calls
     else:
         n_calls = None
 
     predictions = predictor.predict(X_test)
-    if X_train_hash != hash(str(X_train)) or \
-       X_test_hash != hash(str(X_test)) or \
-       Y_train_hash != hash(str(Y_train)):
+    if (
+        X_train_hash != hash(str(X_train))
+        or X_test_hash != hash(str(X_test))
+        or Y_train_hash != hash(str(Y_train))
+    ):
         raise ValueError("Model modified data")
     return predictions, Y_test, n_calls
 
 
-def _test_regressor_iterative_fit(Regressor, dataset='diabetes', sparse=False):
-    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset,
-                                                   make_sparse=sparse)
+def _test_regressor_iterative_fit(Regressor, dataset="diabetes", sparse=False):
+    X_train, Y_train, X_test, Y_test = get_dataset(dataset=dataset, make_sparse=sparse)
     configuration_space = Regressor.get_hyperparameter_search_space(
-        dataset_properties={'sparse': sparse}
+        dataset_properties={"sparse": sparse}
     )
     default_config = configuration_space.get_default_configuration()
     regressor = Regressor(random_state=0, **default_config)
@@ -297,7 +320,7 @@ def _test_regressor_iterative_fit(Regressor, dataset='diabetes', sparse=False):
     regressor.iterative_fit(X_train, Y_train, n_iter=2, refit=True)
     iteration = 2
     while not regressor.configuration_fully_fitted():
-        n_iter = int(2 ** iteration / 2)
+        n_iter = int(2**iteration / 2)
         regressor.iterative_fit(X_train, Y_train, n_iter=n_iter)
         iteration += 1
 
diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py
index 3cb823f2ff..b179efa8d0 100644
--- a/autosklearn/smbo.py
+++ b/autosklearn/smbo.py
@@ -1,4 +1,6 @@
+import typing
 from typing import Dict, List, Optional
+
 import copy
 import json
 import logging
@@ -6,151 +8,191 @@
 import os
 import time
 import traceback
-import typing
 import warnings
 
 import dask.distributed
 import pynisher
-
+from smac.callbacks import IncorporateRunResultCallback
 from smac.facade.smac_ac_facade import SMAC4AC
-from smac.intensification.simple_intensifier import SimpleIntensifier
 from smac.intensification.intensification import Intensifier
+from smac.intensification.simple_intensifier import SimpleIntensifier
 from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
 from smac.scenario.scenario import Scenario
-from smac.tae.serial_runner import SerialRunner
 from smac.tae.dask_runner import DaskParallelRunner
-from smac.callbacks import IncorporateRunResultCallback
-
+from smac.tae.serial_runner import SerialRunner
 
 import autosklearn.metalearning
-from autosklearn.constants import MULTILABEL_CLASSIFICATION, \
-    BINARY_CLASSIFICATION, TASK_TYPES_TO_STRING, CLASSIFICATION_TASKS, \
-    MULTICLASS_CLASSIFICATION, REGRESSION, MULTIOUTPUT_REGRESSION
-from autosklearn.ensemble_builder import EnsembleBuilderManager
-from autosklearn.metalearning.mismbo import suggest_via_metalearning
+from autosklearn.constants import (
+    BINARY_CLASSIFICATION,
+    CLASSIFICATION_TASKS,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    MULTIOUTPUT_REGRESSION,
+    REGRESSION,
+    TASK_TYPES_TO_STRING,
+)
 from autosklearn.data.abstract_data_manager import AbstractDataManager
+from autosklearn.ensemble_builder import EnsembleBuilderManager
 from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
+from autosklearn.metalearning.metafeatures.metafeatures import (
+    calculate_all_metafeatures_encoded_labels,
+    calculate_all_metafeatures_with_labels,
+)
+from autosklearn.metalearning.metalearning.meta_base import MetaBase
+from autosklearn.metalearning.mismbo import suggest_via_metalearning
 from autosklearn.util.logging_ import get_named_client_logger
 from autosklearn.util.parallel import preload_modules
-from autosklearn.metalearning.metalearning.meta_base import MetaBase
-from autosklearn.metalearning.metafeatures.metafeatures import \
-    calculate_all_metafeatures_with_labels, calculate_all_metafeatures_encoded_labels
 
 EXCLUDE_META_FEATURES_CLASSIFICATION = {
-    'Landmark1NN',
-    'LandmarkDecisionNodeLearner',
-    'LandmarkDecisionTree',
-    'LandmarkLDA',
-    'LandmarkNaiveBayes',
-    'LandmarkRandomNodeLearner',
-    'PCAFractionOfComponentsFor95PercentVariance',
-    'PCAKurtosisFirstPC',
-    'PCASkewnessFirstPC',
-    'PCA',
+    "Landmark1NN",
+    "LandmarkDecisionNodeLearner",
+    "LandmarkDecisionTree",
+    "LandmarkLDA",
+    "LandmarkNaiveBayes",
+    "LandmarkRandomNodeLearner",
+    "PCAFractionOfComponentsFor95PercentVariance",
+    "PCAKurtosisFirstPC",
+    "PCASkewnessFirstPC",
+    "PCA",
 }
 
 EXCLUDE_META_FEATURES_REGRESSION = {
-    'Landmark1NN',
-    'LandmarkDecisionNodeLearner',
-    'LandmarkDecisionTree',
-    'LandmarkLDA',
-    'LandmarkNaiveBayes',
-    'PCAFractionOfComponentsFor95PercentVariance',
-    'PCAKurtosisFirstPC',
-    'PCASkewnessFirstPC',
-    'NumberOfClasses',
-    'ClassOccurences',
-    'ClassProbabilityMin',
-    'ClassProbabilityMax',
-    'ClassProbabilityMean',
-    'ClassProbabilitySTD',
-    'ClassEntropy',
-    'LandmarkRandomNodeLearner',
-    'PCA',
+    "Landmark1NN",
+    "LandmarkDecisionNodeLearner",
+    "LandmarkDecisionTree",
+    "LandmarkLDA",
+    "LandmarkNaiveBayes",
+    "PCAFractionOfComponentsFor95PercentVariance",
+    "PCAKurtosisFirstPC",
+    "PCASkewnessFirstPC",
+    "NumberOfClasses",
+    "ClassOccurences",
+    "ClassProbabilityMin",
+    "ClassProbabilityMax",
+    "ClassProbabilityMean",
+    "ClassProbabilitySTD",
+    "ClassEntropy",
+    "LandmarkRandomNodeLearner",
+    "PCA",
 }
 
 
 def get_send_warnings_to_logger(logger):
     def _send_warnings_to_log(message, category, filename, lineno, file, line):
-        logger.debug('%s:%s: %s:%s', filename, lineno, category.__name__, message)
+        logger.debug("%s:%s: %s:%s", filename, lineno, category.__name__, message)
+
     return _send_warnings_to_log
 
 
 # metalearning helpers
-def _calculate_metafeatures(data_feat_type, data_info_task, basename,
-                            x_train, y_train, watcher, logger_):
+def _calculate_metafeatures(
+    data_feat_type, data_info_task, basename, x_train, y_train, watcher, logger_
+):
     with warnings.catch_warnings():
         warnings.showwarning = get_send_warnings_to_logger(logger_)
 
         # == Calculate metafeatures
-        task_name = 'CalculateMetafeatures'
+        task_name = "CalculateMetafeatures"
         watcher.start_task(task_name)
 
-        categorical = {col: True if feat_type.lower() == 'categorical' else False
-                       for col, feat_type in data_feat_type.items()}
+        categorical = {
+            col: True if feat_type.lower() == "categorical" else False
+            for col, feat_type in data_feat_type.items()
+        }
 
-        EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
-            if data_info_task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION
+        EXCLUDE_META_FEATURES = (
+            EXCLUDE_META_FEATURES_CLASSIFICATION
+            if data_info_task in CLASSIFICATION_TASKS
+            else EXCLUDE_META_FEATURES_REGRESSION
+        )
 
-        if data_info_task in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION,
-                              MULTILABEL_CLASSIFICATION, REGRESSION,
-                              MULTIOUTPUT_REGRESSION]:
-            logger_.info('Start calculating metafeatures for %s', basename)
+        if data_info_task in [
+            MULTICLASS_CLASSIFICATION,
+            BINARY_CLASSIFICATION,
+            MULTILABEL_CLASSIFICATION,
+            REGRESSION,
+            MULTIOUTPUT_REGRESSION,
+        ]:
+            logger_.info("Start calculating metafeatures for %s", basename)
             result = calculate_all_metafeatures_with_labels(
-                x_train, y_train, categorical=categorical,
+                x_train,
+                y_train,
+                categorical=categorical,
                 dataset_name=basename,
-                dont_calculate=EXCLUDE_META_FEATURES, logger=logger_)
+                dont_calculate=EXCLUDE_META_FEATURES,
+                logger=logger_,
+            )
             for key in list(result.metafeature_values.keys()):
-                if result.metafeature_values[key].type_ != 'METAFEATURE':
+                if result.metafeature_values[key].type_ != "METAFEATURE":
                     del result.metafeature_values[key]
 
         else:
             result = None
-            logger_.info('Metafeatures not calculated')
+            logger_.info("Metafeatures not calculated")
         watcher.stop_task(task_name)
         logger_.info(
-            'Calculating Metafeatures (categorical attributes) took %5.2f',
-            watcher.wall_elapsed(task_name))
+            "Calculating Metafeatures (categorical attributes) took %5.2f",
+            watcher.wall_elapsed(task_name),
+        )
         return result
 
 
-def _calculate_metafeatures_encoded(data_feat_type, basename, x_train, y_train, watcher,
-                                    task, logger_):
+def _calculate_metafeatures_encoded(
+    data_feat_type, basename, x_train, y_train, watcher, task, logger_
+):
     with warnings.catch_warnings():
         warnings.showwarning = get_send_warnings_to_logger(logger_)
 
-        EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
-            if task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION
+        EXCLUDE_META_FEATURES = (
+            EXCLUDE_META_FEATURES_CLASSIFICATION
+            if task in CLASSIFICATION_TASKS
+            else EXCLUDE_META_FEATURES_REGRESSION
+        )
 
-        task_name = 'CalculateMetafeaturesEncoded'
+        task_name = "CalculateMetafeaturesEncoded"
         watcher.start_task(task_name)
-        categorical = {col: True if feat_type.lower() == 'categorical' else False
-                       for col, feat_type in data_feat_type.items()}
+        categorical = {
+            col: True if feat_type.lower() == "categorical" else False
+            for col, feat_type in data_feat_type.items()
+        }
 
         result = calculate_all_metafeatures_encoded_labels(
-            x_train, y_train, categorical=categorical,
-            dataset_name=basename, dont_calculate=EXCLUDE_META_FEATURES, logger=logger_)
+            x_train,
+            y_train,
+            categorical=categorical,
+            dataset_name=basename,
+            dont_calculate=EXCLUDE_META_FEATURES,
+            logger=logger_,
+        )
         for key in list(result.metafeature_values.keys()):
-            if result.metafeature_values[key].type_ != 'METAFEATURE':
+            if result.metafeature_values[key].type_ != "METAFEATURE":
                 del result.metafeature_values[key]
         watcher.stop_task(task_name)
         logger_.info(
-            'Calculating Metafeatures (encoded attributes) took %5.2fsec',
-            watcher.wall_elapsed(task_name))
+            "Calculating Metafeatures (encoded attributes) took %5.2fsec",
+            watcher.wall_elapsed(task_name),
+        )
         return result
 
 
-def _get_metalearning_configurations(meta_base, basename, metric,
-                                     configuration_space,
-                                     task,
-                                     initial_configurations_via_metalearning,
-                                     is_sparse,
-                                     watcher, logger):
-    task_name = 'InitialConfigurations'
+def _get_metalearning_configurations(
+    meta_base,
+    basename,
+    metric,
+    configuration_space,
+    task,
+    initial_configurations_via_metalearning,
+    is_sparse,
+    watcher,
+    logger,
+):
+    task_name = "InitialConfigurations"
     watcher.start_task(task_name)
     try:
         metalearning_configurations = suggest_via_metalearning(
-            meta_base, basename, metric,
+            meta_base,
+            basename,
+            metric,
             task,
             is_sparse == 1,
             initial_configurations_via_metalearning,
@@ -165,16 +207,21 @@ def _get_metalearning_configurations(meta_base, basename, metric,
     return metalearning_configurations
 
 
-def _print_debug_info_of_init_configuration(initial_configurations, basename,
-                                            time_for_task, logger, watcher):
-    logger.debug('Initial Configurations: (%d)' % len(initial_configurations))
+def _print_debug_info_of_init_configuration(
+    initial_configurations, basename, time_for_task, logger, watcher
+):
+    logger.debug("Initial Configurations: (%d)" % len(initial_configurations))
     for initial_configuration in initial_configurations:
         logger.debug(initial_configuration)
-    logger.debug('Looking for initial configurations took %5.2fsec',
-                 watcher.wall_elapsed('InitialConfigurations'))
+    logger.debug(
+        "Looking for initial configurations took %5.2fsec",
+        watcher.wall_elapsed("InitialConfigurations"),
+    )
     logger.info(
-        'Time left for %s after finding initial configurations: %5.2fsec',
-        basename, time_for_task - watcher.wall_elapsed(basename))
+        "Time left for %s after finding initial configurations: %5.2fsec",
+        basename,
+        time_for_task - watcher.wall_elapsed(basename),
+    )
 
 
 def get_smac_object(
@@ -186,7 +233,7 @@ def get_smac_object(
     n_jobs,
     dask_client,
 ):
-    if len(scenario_dict['instances']) > 1:
+    if len(scenario_dict["instances"]) > 1:
         intensifier = Intensifier
     else:
         intensifier = SimpleIntensifier
@@ -213,35 +260,37 @@ def get_smac_object(
 
 
 class AutoMLSMBO(object):
-
-    def __init__(self, config_space, dataset_name,
-                 backend,
-                 total_walltime_limit,
-                 func_eval_time_limit,
-                 memory_limit,
-                 metric,
-                 watcher,
-                 n_jobs,
-                 dask_client: dask.distributed.Client,
-                 port: int,
-                 start_num_run=1,
-                 data_memory_limit=None,
-                 num_metalearning_cfgs=25,
-                 config_file=None,
-                 seed=1,
-                 metadata_directory=None,
-                 resampling_strategy='holdout',
-                 resampling_strategy_args=None,
-                 include: Optional[Dict[str, List[str]]] = None,
-                 exclude: Optional[Dict[str, List[str]]] = None,
-                 disable_file_output=False,
-                 smac_scenario_args=None,
-                 get_smac_object_callback=None,
-                 scoring_functions=None,
-                 pynisher_context='spawn',
-                 ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
-                 trials_callback: typing.Optional[IncorporateRunResultCallback] = None
-                 ):
+    def __init__(
+        self,
+        config_space,
+        dataset_name,
+        backend,
+        total_walltime_limit,
+        func_eval_time_limit,
+        memory_limit,
+        metric,
+        watcher,
+        n_jobs,
+        dask_client: dask.distributed.Client,
+        port: int,
+        start_num_run=1,
+        data_memory_limit=None,
+        num_metalearning_cfgs=25,
+        config_file=None,
+        seed=1,
+        metadata_directory=None,
+        resampling_strategy="holdout",
+        resampling_strategy_args=None,
+        include: Optional[Dict[str, List[str]]] = None,
+        exclude: Optional[Dict[str, List[str]]] = None,
+        disable_file_output=False,
+        smac_scenario_args=None,
+        get_smac_object_callback=None,
+        scoring_functions=None,
+        pynisher_context="spawn",
+        ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
+        trials_callback: typing.Optional[IncorporateRunResultCallback] = None,
+    ):
         super(AutoMLSMBO, self).__init__()
         # data related
         self.dataset_name = dataset_name
@@ -289,7 +338,11 @@ def __init__(self, config_space, dataset_name,
         self.trials_callback = trials_callback
 
         dataset_name_ = "" if dataset_name is None else dataset_name
-        logger_name = '%s(%d):%s' % (self.__class__.__name__, self.seed, ":" + dataset_name_)
+        logger_name = "%s(%d):%s" % (
+            self.__class__.__name__,
+            self.seed,
+            ":" + dataset_name_,
+        )
         if port is None:
             self.logger = logging.getLogger(__name__)
         else:
@@ -308,7 +361,7 @@ def reset_data_manager(self, max_mem=None):
         else:
             self.datamanager = self.backend.load_datamanager()
 
-        self.task = self.datamanager.info['task']
+        self.task = self.datamanager.info["task"]
 
     def collect_metalearning_suggestions(self, meta_base):
         metalearning_configurations = _get_metalearning_configurations(
@@ -317,16 +370,18 @@ def collect_metalearning_suggestions(self, meta_base):
             metric=self.metric,
             configuration_space=self.config_space,
             task=self.task,
-            is_sparse=self.datamanager.info['is_sparse'],
+            is_sparse=self.datamanager.info["is_sparse"],
             initial_configurations_via_metalearning=self.num_metalearning_cfgs,
             watcher=self.watcher,
-            logger=self.logger)
+            logger=self.logger,
+        )
         _print_debug_info_of_init_configuration(
             metalearning_configurations,
             self.dataset_name,
             self.total_walltime_limit,
             self.logger,
-            self.watcher)
+            self.watcher,
+        )
 
         return metalearning_configurations
 
@@ -336,23 +391,24 @@ def _calculate_metafeatures_with_limits(self, time_limit):
         try:
             context = multiprocessing.get_context(self.pynisher_context)
             preload_modules(context)
-            safe_mf = pynisher.enforce_limits(mem_in_mb=self.memory_limit,
-                                              wall_time_in_s=int(time_limit),
-                                              grace_period_in_s=30,
-                                              context=context,
-                                              logger=self.logger)(
-                _calculate_metafeatures)
+            safe_mf = pynisher.enforce_limits(
+                mem_in_mb=self.memory_limit,
+                wall_time_in_s=int(time_limit),
+                grace_period_in_s=30,
+                context=context,
+                logger=self.logger,
+            )(_calculate_metafeatures)
             res = safe_mf(
                 data_feat_type=self.datamanager.feat_type,
-                data_info_task=self.datamanager.info['task'],
-                x_train=self.datamanager.data['X_train'],
-                y_train=self.datamanager.data['Y_train'],
+                data_info_task=self.datamanager.info["task"],
+                x_train=self.datamanager.data["X_train"],
+                y_train=self.datamanager.data["Y_train"],
                 basename=self.dataset_name,
                 watcher=self.watcher,
-                logger_=self.logger
+                logger_=self.logger,
             )
         except Exception as e:
-            self.logger.error('Error getting metafeatures: %s', str(e))
+            self.logger.error("Error getting metafeatures: %s", str(e))
 
         return res
 
@@ -362,30 +418,30 @@ def _calculate_metafeatures_encoded_with_limits(self, time_limit):
         try:
             context = multiprocessing.get_context(self.pynisher_context)
             preload_modules(context)
-            safe_mf = pynisher.enforce_limits(mem_in_mb=self.memory_limit,
-                                              wall_time_in_s=int(time_limit),
-                                              grace_period_in_s=30,
-                                              context=context,
-                                              logger=self.logger)(
-                _calculate_metafeatures_encoded)
+            safe_mf = pynisher.enforce_limits(
+                mem_in_mb=self.memory_limit,
+                wall_time_in_s=int(time_limit),
+                grace_period_in_s=30,
+                context=context,
+                logger=self.logger,
+            )(_calculate_metafeatures_encoded)
             res = safe_mf(
                 data_feat_type=self.datamanager.feat_type,
-                task=self.datamanager.info['task'],
-                x_train=self.datamanager.data['X_train'],
-                y_train=self.datamanager.data['Y_train'],
+                task=self.datamanager.info["task"],
+                x_train=self.datamanager.data["X_train"],
+                y_train=self.datamanager.data["Y_train"],
                 basename=self.dataset_name,
                 watcher=self.watcher,
-                logger_=self.logger
+                logger_=self.logger,
             )
         except Exception as e:
-            self.logger.error('Error getting metafeatures (encoded) : %s',
-                              str(e))
+            self.logger.error("Error getting metafeatures (encoded) : %s", str(e))
 
         return res
 
     def run_smbo(self):
 
-        self.watcher.start_task('SMBO')
+        self.watcher.start_task("SMBO")
 
         # == first things first: load the datamanager
         self.reset_data_manager()
@@ -401,14 +457,14 @@ def run_smbo(self):
 
         metalearning_configurations = self.get_metalearning_suggestions()
 
-        if self.resampling_strategy in ['partial-cv',
-                                        'partial-cv-iterative-fit']:
-            num_folds = self.resampling_strategy_args['folds']
-            instances = [[json.dumps({'task_id': self.dataset_name,
-                                      'fold': fold_number})]
-                         for fold_number in range(num_folds)]
+        if self.resampling_strategy in ["partial-cv", "partial-cv-iterative-fit"]:
+            num_folds = self.resampling_strategy_args["folds"]
+            instances = [
+                [json.dumps({"task_id": self.dataset_name, "fold": fold_number})]
+                for fold_number in range(num_folds)
+            ]
         else:
-            instances = [[json.dumps({'task_id': self.dataset_name})]]
+            instances = [[json.dumps({"task_id": self.dataset_name})]]
 
         # TODO rebuild target algorithm to be it's own target algorithm
         # evaluator, which takes into account that a run can be killed prior
@@ -428,62 +484,64 @@ def run_smbo(self):
             scoring_functions=self.scoring_functions,
             port=self.port,
             pynisher_context=self.pynisher_context,
-            **self.resampling_strategy_args
+            **self.resampling_strategy_args,
         )
         ta = ExecuteTaFuncWithQueue
 
         startup_time = self.watcher.wall_elapsed(self.dataset_name)
         total_walltime_limit = self.total_walltime_limit - startup_time - 5
         scenario_dict = {
-            'abort_on_first_run_crash': False,
-            'save-results-instantly': True,
-            'cs': self.config_space,
-            'cutoff_time': self.func_eval_time_limit,
-            'deterministic': 'true',
-            'instances': instances,
-            'memory_limit': self.memory_limit,
-            'output-dir': self.backend.get_smac_output_directory(),
-            'run_obj': 'quality',
-            'wallclock_limit': total_walltime_limit,
-            'cost_for_crash': self.worst_possible_result,
+            "abort_on_first_run_crash": False,
+            "save-results-instantly": True,
+            "cs": self.config_space,
+            "cutoff_time": self.func_eval_time_limit,
+            "deterministic": "true",
+            "instances": instances,
+            "memory_limit": self.memory_limit,
+            "output-dir": self.backend.get_smac_output_directory(),
+            "run_obj": "quality",
+            "wallclock_limit": total_walltime_limit,
+            "cost_for_crash": self.worst_possible_result,
         }
         if self.smac_scenario_args is not None:
             for arg in [
-                'abort_on_first_run_crash',
-                'cs',
-                'deterministic',
-                'instances',
-                'output-dir',
-                'run_obj',
-                'shared-model',
-                'cost_for_crash',
+                "abort_on_first_run_crash",
+                "cs",
+                "deterministic",
+                "instances",
+                "output-dir",
+                "run_obj",
+                "shared-model",
+                "cost_for_crash",
             ]:
                 if arg in self.smac_scenario_args:
-                    self.logger.warning('Cannot override scenario argument %s, '
-                                        'will ignore this.', arg)
+                    self.logger.warning(
+                        "Cannot override scenario argument %s, " "will ignore this.",
+                        arg,
+                    )
                     del self.smac_scenario_args[arg]
             for arg in [
-                'cutoff_time',
-                'memory_limit',
-                'wallclock_limit',
+                "cutoff_time",
+                "memory_limit",
+                "wallclock_limit",
             ]:
                 if arg in self.smac_scenario_args:
                     self.logger.warning(
-                        'Overriding scenario argument %s: %s with value %s',
+                        "Overriding scenario argument %s: %s with value %s",
                         arg,
                         scenario_dict[arg],
-                        self.smac_scenario_args[arg]
+                        self.smac_scenario_args[arg],
                     )
             scenario_dict.update(self.smac_scenario_args)
 
         smac_args = {
-            'scenario_dict': scenario_dict,
-            'seed': seed,
-            'ta': ta,
-            'ta_kwargs': ta_kwargs,
-            'metalearning_configurations': metalearning_configurations,
-            'n_jobs': self.n_jobs,
-            'dask_client': self.dask_client,
+            "scenario_dict": scenario_dict,
+            "seed": seed,
+            "ta": ta,
+            "ta_kwargs": ta_kwargs,
+            "metalearning_configurations": metalearning_configurations,
+            "n_jobs": self.n_jobs,
+            "dask_client": self.dask_client,
         }
         if self.get_smac_object_callback is not None:
             smac = self.get_smac_object_callback(**smac_args)
@@ -516,25 +574,33 @@ def get_metalearning_suggestions(self):
             # If metadata directory is None, use default
             if self.metadata_directory is None:
                 metalearning_directory = os.path.dirname(
-                    autosklearn.metalearning.__file__)
+                    autosklearn.metalearning.__file__
+                )
                 # There is no multilabel data in OpenML
                 if self.task == MULTILABEL_CLASSIFICATION:
                     meta_task = BINARY_CLASSIFICATION
                 else:
                     meta_task = self.task
                 metadata_directory = os.path.join(
-                    metalearning_directory, 'files',
-                    '%s_%s_%s' % (self.metric, TASK_TYPES_TO_STRING[meta_task],
-                                  'sparse' if self.datamanager.info['is_sparse']
-                                  else 'dense'))
+                    metalearning_directory,
+                    "files",
+                    "%s_%s_%s"
+                    % (
+                        self.metric,
+                        TASK_TYPES_TO_STRING[meta_task],
+                        "sparse" if self.datamanager.info["is_sparse"] else "dense",
+                    ),
+                )
                 self.metadata_directory = metadata_directory
 
             # If metadata directory is specified by user,
             # then verify that it exists.
             else:
                 if not os.path.exists(self.metadata_directory):
-                    raise ValueError('The specified metadata directory \'%s\' '
-                                     'does not exist!' % self.metadata_directory)
+                    raise ValueError(
+                        "The specified metadata directory '%s' "
+                        "does not exist!" % self.metadata_directory
+                    )
 
                 else:
                     # There is no multilabel data in OpenML
@@ -545,51 +611,66 @@ def get_metalearning_suggestions(self):
 
                     metadata_directory = os.path.join(
                         self.metadata_directory,
-                        '%s_%s_%s' % (self.metric, TASK_TYPES_TO_STRING[meta_task],
-                                      'sparse' if self.datamanager.info['is_sparse']
-                                      else 'dense'))
+                        "%s_%s_%s"
+                        % (
+                            self.metric,
+                            TASK_TYPES_TO_STRING[meta_task],
+                            "sparse" if self.datamanager.info["is_sparse"] else "dense",
+                        ),
+                    )
                     # Check that the metadata directory has the correct
                     # subdirectory needed for this dataset.
-                    if os.path.basename(metadata_directory) not in \
-                            os.listdir(self.metadata_directory):
-                        raise ValueError('The specified metadata directory '
-                                         '\'%s\' does not have the correct '
-                                         'subdirectory \'%s\'' %
-                                         (self.metadata_directory,
-                                          os.path.basename(metadata_directory))
-                                         )
+                    if os.path.basename(metadata_directory) not in os.listdir(
+                        self.metadata_directory
+                    ):
+                        raise ValueError(
+                            "The specified metadata directory "
+                            "'%s' does not have the correct "
+                            "subdirectory '%s'"
+                            % (
+                                self.metadata_directory,
+                                os.path.basename(metadata_directory),
+                            )
+                        )
                 self.metadata_directory = metadata_directory
 
             if os.path.exists(self.metadata_directory):
 
-                self.logger.info('Metadata directory: %s',
-                                 self.metadata_directory)
-                meta_base = MetaBase(self.config_space, self.metadata_directory, self.logger)
+                self.logger.info("Metadata directory: %s", self.metadata_directory)
+                meta_base = MetaBase(
+                    self.config_space, self.metadata_directory, self.logger
+                )
 
-                metafeature_calculation_time_limit = int(
-                    self.total_walltime_limit / 4)
+                metafeature_calculation_time_limit = int(self.total_walltime_limit / 4)
                 metafeature_calculation_start_time = time.time()
                 meta_features = self._calculate_metafeatures_with_limits(
-                    metafeature_calculation_time_limit)
+                    metafeature_calculation_time_limit
+                )
                 metafeature_calculation_end_time = time.time()
-                metafeature_calculation_time_limit = \
-                    metafeature_calculation_time_limit - (
-                        metafeature_calculation_end_time -
-                        metafeature_calculation_start_time)
+                metafeature_calculation_time_limit = (
+                    metafeature_calculation_time_limit
+                    - (
+                        metafeature_calculation_end_time
+                        - metafeature_calculation_start_time
+                    )
+                )
 
                 if metafeature_calculation_time_limit < 1:
                     self.logger.warning(
-                        'Time limit for metafeature calculation less '
-                        'than 1 seconds (%f). Skipping calculation '
-                        'of metafeatures for encoded dataset.',
-                        metafeature_calculation_time_limit)
+                        "Time limit for metafeature calculation less "
+                        "than 1 seconds (%f). Skipping calculation "
+                        "of metafeatures for encoded dataset.",
+                        metafeature_calculation_time_limit,
+                    )
                     meta_features_encoded = None
                 else:
                     with warnings.catch_warnings():
                         warnings.showwarning = get_send_warnings_to_logger(self.logger)
-                    meta_features_encoded = \
+                    meta_features_encoded = (
                         self._calculate_metafeatures_encoded_with_limits(
-                            metafeature_calculation_time_limit)
+                            metafeature_calculation_time_limit
+                        )
+                    )
 
                 # In case there is a problem calculating the encoded meta-features
                 if meta_features is None:
@@ -598,26 +679,28 @@ def get_metalearning_suggestions(self):
                 else:
                     if meta_features_encoded is not None:
                         meta_features.metafeature_values.update(
-                            meta_features_encoded.metafeature_values)
+                            meta_features_encoded.metafeature_values
+                        )
 
                 if meta_features is not None:
                     meta_base.add_dataset(self.dataset_name, meta_features)
                     # Do mean imputation of the meta-features - should be done specific
                     # for each prediction model!
                     all_metafeatures = meta_base.get_metafeatures(
-                        features=list(meta_features.keys()))
-                    all_metafeatures.fillna(all_metafeatures.mean(),
-                                            inplace=True)
+                        features=list(meta_features.keys())
+                    )
+                    all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)
 
                     with warnings.catch_warnings():
                         warnings.showwarning = get_send_warnings_to_logger(self.logger)
-                        metalearning_configurations = self.collect_metalearning_suggestions(
-                            meta_base)
+                        metalearning_configurations = (
+                            self.collect_metalearning_suggestions(meta_base)
+                        )
                     if metalearning_configurations is None:
                         metalearning_configurations = []
                     self.reset_data_manager()
 
-                    self.logger.info('%s', meta_features)
+                    self.logger.info("%s", meta_features)
 
                     # Convert meta-features into a dictionary because the scenario
                     # expects a dictionary
@@ -627,13 +710,15 @@ def get_metalearning_suggestions(self):
                     meta_features_list = []
                     for meta_feature_name in all_metafeatures.columns:
                         meta_features_list.append(
-                            meta_features[meta_feature_name].value)
+                            meta_features[meta_feature_name].value
+                        )
                     self.logger.info(list(meta_features_dict.keys()))
 
             else:
                 meta_features = None
-                self.logger.warning('Could not find meta-data directory %s' %
-                                    metadata_directory)
+                self.logger.warning(
+                    "Could not find meta-data directory %s" % metadata_directory
+                )
 
         else:
             meta_features = None
diff --git a/autosklearn/util/__init__.py b/autosklearn/util/__init__.py
index 6eeff57a7d..9f2d05ccd5 100644
--- a/autosklearn/util/__init__.py
+++ b/autosklearn/util/__init__.py
@@ -1,7 +1,7 @@
 # -*- encoding: utf-8 -*-
 import re
 
-
-SUBPATTERN = r'((?P<operation%d>==|>=|>|<)(?P<version%d>(\d+)?(\.[a-zA-Z0-9]+)?(\.[a-zA-Z0-9]+)?))'
+SUBPATTERN = r"((?P<operation%d>==|>=|>|<)(?P<version%d>(\d+)?(\.[a-zA-Z0-9]+)?(\.[a-zA-Z0-9]+)?))"  # noqa: E501
 RE_PATTERN = re.compile(
-    r'^(?P<name>[\w\-]+)%s?(,%s)?$' % (SUBPATTERN % (1, 1), SUBPATTERN % (2, 2)))
+    r"^(?P<name>[\w\-]+)%s?(,%s)?$" % (SUBPATTERN % (1, 1), SUBPATTERN % (2, 2))
+)
diff --git a/autosklearn/util/common.py b/autosklearn/util/common.py
index 4905d0eaa8..ddee4dc1ab 100644
--- a/autosklearn/util/common.py
+++ b/autosklearn/util/common.py
@@ -5,22 +5,21 @@
 
 import numpy as np
 
-__all__ = [
-    'check_pid',
-    'warn_if_not_float'
-]
+__all__ = ["check_pid", "warn_if_not_float"]
 
 
-def warn_if_not_float(X: np.ndarray, estimator: str = 'This algorithm') -> bool:
+def warn_if_not_float(X: np.ndarray, estimator: str = "This algorithm") -> bool:
     """Warning utility function to check that data type is floating point.
     Returns True if a warning was raised (i.e. the input is not float) and
     False otherwise, for easier input validation.
     """
     if not isinstance(estimator, str):
         estimator = estimator.__class__.__name__
-    if X.dtype.kind != 'f':
-        warnings.warn("%s assumes floating point values as input, "
-                      "got %s" % (estimator, X.dtype))
+    if X.dtype.kind != "f":
+        warnings.warn(
+            "%s assumes floating point values as input, "
+            "got %s" % (estimator, X.dtype)
+        )
         return True
     return False
 
diff --git a/autosklearn/util/data.py b/autosklearn/util/data.py
index ff6eb3c337..bdd4cf31b2 100644
--- a/autosklearn/util/data.py
+++ b/autosklearn/util/data.py
@@ -1,4 +1,3 @@
-import warnings
 from typing import (
     Any,
     Dict,
@@ -10,21 +9,18 @@
     Tuple,
     Type,
     Union,
-    cast
+    cast,
 )
 
-import numpy as np
+import warnings
 
+import numpy as np
 import pandas as pd
-
 from scipy.sparse import spmatrix
-
 from sklearn.model_selection import train_test_split
 
-from autosklearn.data.validation import SUPPORTED_FEAT_TYPES
 from autosklearn.evaluation.splitter import CustomStratifiedShuffleSplit
 
-
 # TODO: TypedDict with python 3.8
 #
 #   When upgrading to python 3.8 as minimum version, this should be a TypedDict
@@ -32,15 +28,14 @@
 DatasetCompressionSpec = Dict[str, Union[float, List[str]]]
 
 # Default specification for arg `dataset_compression`
-default_dataset_compression_arg:  DatasetCompressionSpec = {
+default_dataset_compression_arg: DatasetCompressionSpec = {
     "memory_allocation": 0.1,
-    "methods": ["precision", "subsample"]
+    "methods": ["precision", "subsample"],
 }
 
 
 def validate_dataset_compression_arg(
-    dataset_compression: Mapping[str, Any],
-    memory_limit: int
+    dataset_compression: Mapping[str, Any], memory_limit: int
 ) -> DatasetCompressionSpec:
     """Validates and return a correct dataset_compression argument
 
@@ -58,22 +53,24 @@ def validate_dataset_compression_arg(
     """
     if isinstance(dataset_compression, Mapping):
         # Fill with defaults if they don't exist
-        dataset_compression = {
-            **default_dataset_compression_arg,
-            **dataset_compression
-        }
+        dataset_compression = {**default_dataset_compression_arg, **dataset_compression}
+
+        parsed_keys = set(dataset_compression.keys())
+        default_keys = set(default_dataset_compression_arg.keys())
 
         # Must contain known keys
-        if set(dataset_compression.keys()) != set(default_dataset_compression_arg.keys()):
+        if parsed_keys != default_keys:
             raise ValueError(
-                f"Unknown key in dataset_compression, {list(dataset_compression.keys())}."
-                f"\nPossible keys are {list(default_dataset_compression_arg.keys())}"
+                f"Unknown key(s) in ``dataset_compression``, {parsed_keys}."
+                f"\nPossible keys are {default_keys}"
             )
 
         memory_allocation = dataset_compression["memory_allocation"]
 
         # "memory_allocation" must be float or int
-        if not (isinstance(memory_allocation, float) or isinstance(memory_allocation, int)):
+        if not (
+            isinstance(memory_allocation, float) or isinstance(memory_allocation, int)
+        ):
             raise ValueError(
                 "key 'memory_allocation' must be an `int` or `float`"
                 f"\ntype = {memory_allocation}"
@@ -89,9 +86,11 @@ def validate_dataset_compression_arg(
             )
 
         # "memory_allocation" if absolute, should be > 0 and < memory_limit
-        if isinstance(memory_allocation, int) and not (0 < memory_allocation < memory_limit):
+        if isinstance(memory_allocation, int) and not (
+            0 < memory_allocation < memory_limit
+        ):
             raise ValueError(
-                f"key 'memory_allocation' if int must be in (0, memory_limit={memory_limit})"
+                f"key 'memory_allocation' if int must be in (0, {memory_limit})"
                 f"\nmemory_allocation = {memory_allocation}"
                 f"\ndataset_compression = {dataset_compression}"
             )
@@ -109,11 +108,13 @@ def validate_dataset_compression_arg(
 
         # "methods" must contain known methods
         if any(
-            method not in cast(Sequence, default_dataset_compression_arg["methods"])  # mypy
+            method
+            not in cast(Sequence, default_dataset_compression_arg["methods"])  # mypy
             for method in dataset_compression["methods"]
         ):
+            valid_methods = default_dataset_compression_arg["methods"]
             raise ValueError(
-                f"key 'methods' can only contain {default_dataset_compression_arg['methods']}"
+                f"key 'methods' can only contain {valid_methods}"
                 f"\nmethods = {dataset_compression['methods']}"
                 f"\ndataset_compression = {dataset_compression}"
             )
@@ -160,10 +161,10 @@ class _DtypeReductionMapping(Mapping):
     # provide only as much precision as np.longdouble,
     # that is, 80 bits on most x86 machines and 64 bits
     # in standard Windows builds.
-    if hasattr(np, 'float96'):
+    if hasattr(np, "float96"):
         _mapping[np.float96] = np.float64
 
-    if hasattr(np, 'float128'):
+    if hasattr(np, "float128"):
         _mapping[np.float128] = np.float64
 
     @classmethod
@@ -191,8 +192,10 @@ def binarization(array: Union[List, np.ndarray]) -> np.ndarray:
     # into 1 and the min into 0
     array = np.array(array, dtype=float)  # conversion needed to use np.inf
     if len(np.unique(array)) > 2:
-        raise ValueError('The argument must be a binary-class datafile. '
-                         '{} classes detected'.format(len(np.unique(array))))
+        raise ValueError(
+            "The argument must be a binary-class datafile. "
+            "{} classes detected".format(len(np.unique(array)))
+        )
 
     # manipulation which aims at avoid error in data
     # with for example classes '1' and '2'.
@@ -252,8 +255,8 @@ def subsample(
     is_classification: bool,
     sample_size: Union[float, int],
     random_state: Optional[Union[int, np.random.RandomState]] = None,
-) -> Tuple[SUPPORTED_FEAT_TYPES, Union[List, np.ndarray, pd.DataFrame, pd.Series]]:
-    """ Subsamples data returning the same type as it recieved.
+) -> Tuple[Union[np.ndarray, spmatrix], np.ndarray]:
+    """Subsamples data returning the same type as it recieved.
 
     If `is_classification`, we split using a stratified shuffle split which
     preserves unique labels in the training set.
@@ -298,8 +301,7 @@ def subsample(
 
     if is_classification:
         splitter = CustomStratifiedShuffleSplit(
-            train_size=sample_size,
-            random_state=random_state
+            train_size=sample_size, random_state=random_state
         )
         left_idxs, _ = next(splitter.split(X=X, y=y))
 
@@ -319,7 +321,8 @@ def subsample(
 
     else:
         X, _, y, _ = train_test_split(  # type: ignore
-            X, y,
+            X,
+            y,
             train_size=sample_size,
             random_state=random_state,
         )
@@ -330,7 +333,7 @@ def subsample(
 def reduce_precision(
     X: Union[np.ndarray, spmatrix]
 ) -> Tuple[Union[np.ndarray, spmatrix], Type]:
-    """ Reduces the precision of a np.ndarray or spmatrix containing floats
+    """Reduces the precision of a np.ndarray or spmatrix containing floats
 
     Parameters
     ----------
@@ -343,8 +346,10 @@ def reduce_precision(
         Returns the reduced data X along with the dtype it was reduced to.
     """
     if X.dtype not in supported_precision_reductions:
-        raise ValueError(f"X.dtype = {X.dtype} not equal to any supported"
-                         f" {supported_precision_reductions}")
+        raise ValueError(
+            f"X.dtype = {X.dtype} not equal to any supported"
+            f" {supported_precision_reductions}"
+        )
 
     precision = reduction_mapping[X.dtype]
     return X.astype(precision), precision
@@ -356,10 +361,10 @@ def reduce_dataset_size_if_too_large(
     memory_limit: int,
     is_classification: bool,
     random_state: Union[int, np.random.RandomState] = None,
-    operations: List[str] = ['precision', 'subsample'],
+    operations: List[str] = ["precision", "subsample"],
     memory_allocation: Union[int, float] = 0.1,
 ) -> Tuple[Union[np.ndarray, spmatrix], np.ndarray]:
-    f""" Reduces the size of the dataset if it's too close to the memory limit.
+    f"""Reduces the size of the dataset if it's too close to the memory limit.
 
     Follows the order of the operations passed in and retains the type of its
     input.
@@ -408,8 +413,8 @@ def reduce_dataset_size_if_too_large(
 
         **subsample**
 
-        Reduce the amount of samples of the dataset such that it fits into the allocated memory.
-        Ensures stratification and that unique labels are present
+        Reduce the amount of samples of the dataset such that it fits into the allocated
+        memory. Ensures stratification and that unique labels are present
 
     memory_allocation: Union[int, float] = 0.1
         The amount of memory to allocate to the dataset. A float specifys that the
@@ -437,27 +442,31 @@ def reduce_dataset_size_if_too_large(
         allocated_memory = memory_allocation
 
     else:
-        raise ValueError(f"Unknown type for `memory_allocation` {type(memory_allocation)}")
+        raise ValueError(
+            f"Unknown type for `memory_allocation` {type(memory_allocation)}"
+        )
 
-    if 'precision' in operations and X.dtype not in supported_precision_reductions:
+    if "precision" in operations and X.dtype not in supported_precision_reductions:
         raise ValueError(f"Unsupported type `{X.dtype}` for precision reduction")
 
     def megabytes(arr: Union[np.ndarray, spmatrix]) -> float:
-        return (arr.nbytes if isinstance(X, np.ndarray) else arr.data.nbytes) / (2**20)
+        return (arr.nbytes if isinstance(X, np.ndarray) else arr.data.nbytes) / (
+            2**20
+        )
 
     for operation in operations:
 
-        if operation == 'precision':
+        if operation == "precision":
             # If the dataset is too big for the allocated memory,
             # we then try to reduce the precision if it's a high precision dataset
             if megabytes(X) > allocated_memory:
                 X, precision = reduce_precision(X)
                 warnings.warn(
-                    f'Dataset too large for allocated memory {allocated_memory}MB, '
-                    f'reduced the precision from {X.dtype} to {precision}',
+                    f"Dataset too large for allocated memory {allocated_memory}MB, "
+                    f"reduced the precision from {X.dtype} to {precision}",
                 )
 
-        elif operation == 'subsample':
+        elif operation == "subsample":
             # If the dataset is still too big such that we couldn't fit
             # into the allocated memory, we subsample it so that it does
             if megabytes(X) > allocated_memory:
@@ -470,16 +479,18 @@ def megabytes(arr: Union[np.ndarray, spmatrix]) -> float:
                 # Tried the generic `def subsample(X: T) -> T` approach but it was
                 # failing elsewhere, keeping it simple for now
                 X, y = subsample(  # type: ignore
-                    X, y,
+                    X,
+                    y,
                     sample_size=sample_percentage,
                     is_classification=is_classification,
-                    random_state=random_state
+                    random_state=random_state,
                 )
 
                 n_samples_after = X.shape[0]
                 warnings.warn(
-                    f"Dataset too large for allocated memory {allocated_memory}MB, reduced"
-                    f" number of samples from {n_samples_before} to {n_samples_after}."
+                    f"Dataset too large for allocated memory {allocated_memory}MB,"
+                    f" reduced number of samples from {n_samples_before} to"
+                    f" {n_samples_after}."
                 )
 
         else:
diff --git a/autosklearn/util/dependencies.py b/autosklearn/util/dependencies.py
index d213000871..fb9037450b 100644
--- a/autosklearn/util/dependencies.py
+++ b/autosklearn/util/dependencies.py
@@ -1,6 +1,7 @@
+from typing import List, Optional, Union, no_type_check
+
 import importlib
 from distutils.version import LooseVersion
-from typing import List, Optional, Union, no_type_check
 
 import pkg_resources
 
@@ -19,12 +20,12 @@ def verify_packages(packages: Optional[Union[str, List[str]]]) -> None:
 
         match = RE_PATTERN.match(package)
         if match:
-            name = match.group('name')
-            operation = match.group('operation1')
-            version = match.group('version1')
+            name = match.group("name")
+            operation = match.group("operation1")
+            version = match.group("version1")
             _verify_package(name, operation, version)
         else:
-            raise ValueError('Unable to read requirement: %s' % package)
+            raise ValueError("Unable to read requirement: %s" % package)
 
 
 # Module has no attribute __version__ wa
@@ -45,48 +46,56 @@ def _verify_package(name: str, operation: Optional[str], version: str) -> None:
 
     required_version = LooseVersion(version)
 
-    if operation == '==':
+    if operation == "==":
         check = required_version == installed_version
-    elif operation == '>':
+    elif operation == ">":
         check = installed_version > required_version
-    elif operation == '<':
+    elif operation == "<":
         check = installed_version < required_version
-    elif operation == '>=':
-        check = installed_version > required_version or \
-                installed_version == required_version
+    elif operation == ">=":
+        check = (
+            installed_version > required_version
+            or installed_version == required_version
+        )
     else:
-        raise NotImplementedError(
-            'operation \'%s\' is not supported' % operation)
+        raise NotImplementedError("operation '%s' is not supported" % operation)
     if not check:
-        raise IncorrectPackageVersionError(name, installed_version, operation,
-                                           required_version)
+        raise IncorrectPackageVersionError(
+            name, installed_version, operation, required_version
+        )
 
 
 class MissingPackageError(Exception):
-    error_message = 'Mandatory package \'{name}\' not found!'
+    error_message = "Mandatory package '{name}' not found!"
 
     def __init__(self, package_name: str):
         self.package_name = package_name
         super(MissingPackageError, self).__init__(
-            self.error_message.format(name=package_name))
+            self.error_message.format(name=package_name)
+        )
 
 
 class IncorrectPackageVersionError(Exception):
-    error_message = "found '{name}' version {installed_version} but requires {name} version " \
-                    "{operation}{required_version}"
-
-    def __init__(self,
-                 package_name: str,
-                 installed_version: Union[str, LooseVersion],
-                 operation: Optional[str],
-                 required_version: Union[str, LooseVersion]
-                 ):
+    error_message = (
+        "found '{name}' version {installed_version} but requires {name} version "
+        "{operation}{required_version}"
+    )
+
+    def __init__(
+        self,
+        package_name: str,
+        installed_version: Union[str, LooseVersion],
+        operation: Optional[str],
+        required_version: Union[str, LooseVersion],
+    ):
         self.package_name = package_name
         self.installed_version = installed_version
         self.operation = operation
         self.required_version = required_version
-        message = self.error_message.format(name=package_name,
-                                            installed_version=installed_version,
-                                            operation=operation,
-                                            required_version=required_version)
+        message = self.error_message.format(
+            name=package_name,
+            installed_version=installed_version,
+            operation=operation,
+            required_version=required_version,
+        )
         super(IncorrectPackageVersionError, self).__init__(message)
diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py
index fc298cd053..a85e4a80d6 100644
--- a/autosklearn/util/logging_.py
+++ b/autosklearn/util/logging_.py
@@ -1,4 +1,6 @@
 # -*- encoding: utf-8 -*-
+from typing import Any, Dict, Iterator, Optional, TextIO, Type, cast
+
 import logging
 import logging.config
 import logging.handlers
@@ -12,7 +14,6 @@
 import threading
 import warnings
 from contextlib import contextmanager
-from typing import Any, Dict, Iterator, Optional, TextIO, Type, cast
 
 import yaml
 
@@ -26,41 +27,43 @@ def setup_logger(
     # logging_config must be a dictionary object specifying the configuration
     # for the loggers to be used in auto-sklearn.
     if logging_config is None:
-        with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'), 'r') as fh:
+        with open(os.path.join(os.path.dirname(__file__), "logging.yaml"), "r") as fh:
             logging_config = yaml.safe_load(fh)
 
     # Make sure we have a filename handler
-    if 'handlers' not in logging_config:
-        logging_config['handlers'] = {}
-    if 'file_handler' not in logging_config['handlers']:
-        logging_config['handlers']['file_handler'] = {
-            'class': 'logging.FileHandler',
-            'level': 'DEBUG',
-            'filename': 'autosklearn.log'
+    if "handlers" not in logging_config:
+        logging_config["handlers"] = {}
+    if "file_handler" not in logging_config["handlers"]:
+        logging_config["handlers"]["file_handler"] = {
+            "class": "logging.FileHandler",
+            "level": "DEBUG",
+            "filename": "autosklearn.log",
         }
-    if 'distributed_logfile' not in logging_config['handlers']:
+    if "distributed_logfile" not in logging_config["handlers"]:
         # We have to create a file handler
-        logging_config['handlers']['distributed_logfile'] = {
-            'class': 'logging.FileHandler',
-            'level': 'DEBUG',
-            'filename': 'distributed.log'
+        logging_config["handlers"]["distributed_logfile"] = {
+            "class": "logging.FileHandler",
+            "level": "DEBUG",
+            "filename": "distributed.log",
         }
 
     if filename is None:
-        filename = logging_config['handlers']['file_handler']['filename']
+        filename = logging_config["handlers"]["file_handler"]["filename"]
 
     if distributedlog_filename is None:
-        distributedlog_filename = logging_config['handlers']['distributed_logfile']['filename']
+        distributedlog_filename = logging_config["handlers"]["distributed_logfile"][
+            "filename"
+        ]
 
     # Make path absolute only if required
     # This is needed because this function might be called multiple times with the same
     # dict, and we don't want /path/path/<name>.log but rather just /path/<name>.log
-    if os.path.sep not in logging_config['handlers']['file_handler']['filename']:
-        logging_config['handlers']['file_handler']['filename'] = os.path.join(
+    if os.path.sep not in logging_config["handlers"]["file_handler"]["filename"]:
+        logging_config["handlers"]["file_handler"]["filename"] = os.path.join(
             output_dir, filename
         )
-    if os.path.sep not in logging_config['handlers']['distributed_logfile']['filename']:
-        logging_config['handlers']['distributed_logfile']['filename'] = os.path.join(
+    if os.path.sep not in logging_config["handlers"]["distributed_logfile"]["filename"]:
+        logging_config["handlers"]["distributed_logfile"]["filename"] = os.path.join(
             output_dir, distributedlog_filename
         )
     logging.config.dictConfig(logging_config)
@@ -70,13 +73,12 @@ def _create_logger(name: str) -> logging.Logger:
     return logging.getLogger(name)
 
 
-def get_logger(name: str) -> 'PickableLoggerAdapter':
+def get_logger(name: str) -> "PickableLoggerAdapter":
     logger = PickableLoggerAdapter(name)
     return logger
 
 
 class PickableLoggerAdapter(object):
-
     def __init__(self, name: str):
         self.name = name
         self.logger = _create_logger(name)
@@ -90,7 +92,7 @@ def __getstate__(self) -> Dict[str, Any]:
         Dictionary, representing the object state to be pickled. Ignores
         the self.logger field and only returns the logger name.
         """
-        return {'name': self.name}
+        return {"name": self.name}
 
     def __setstate__(self, state: Dict[str, Any]) -> None:
         """
@@ -102,7 +104,7 @@ def __setstate__(self, state: Dict[str, Any]) -> None:
         state - dictionary, containing the logger name.
 
         """
-        self.name = state['name']
+        self.name = state["name"]
         self.logger = _create_logger(self.name)
 
     def debug(self, msg: str, *args: Any, **kwargs: Any) -> None:
@@ -132,20 +134,16 @@ def isEnabledFor(self, level: int) -> bool:
 
 def get_named_client_logger(
     name: str,
-    host: str = 'localhost',
+    host: str = "localhost",
     port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
-) -> 'PicklableClientLogger':
-    logger = PicklableClientLogger(
-        name=name,
-        host=host,
-        port=port
-    )
+) -> "PicklableClientLogger":
+    logger = PicklableClientLogger(name=name, host=host, port=port)
     return logger
 
 
 def _get_named_client_logger(
     name: str,
-    host: str = 'localhost',
+    host: str = "localhost",
     port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
 ) -> logging.Logger:
     """
@@ -176,7 +174,7 @@ def _get_named_client_logger(
     # We add client not only to identify that this is the client
     # communication part of the logger, but to make sure we have
     # a new singleton with the desired socket handlers
-    local_logger = _create_logger('Client-' + name)
+    local_logger = _create_logger("Client-" + name)
     local_logger.propagate = False
     local_logger.setLevel(logging.DEBUG)
 
@@ -184,8 +182,9 @@ def _get_named_client_logger(
         # Ignore mypy logging.handlers.SocketHandler has no attribute port
         # This is not the case clearly, yet MyPy assumes this is not the case
         # Even when using direct casting or getattr
-        ports = [getattr(handler, 'port', None
-                         ) for handler in local_logger.handlers]  # type: ignore[attr-defined]
+        ports = [
+            getattr(handler, "port", None) for handler in local_logger.handlers
+        ]  # type: ignore[attr-defined]
     except AttributeError:
         # We do not want to log twice but adding multiple times the same
         # handler. So we check to what ports we communicate to
@@ -201,16 +200,11 @@ def _get_named_client_logger(
 
 
 class PicklableClientLogger(PickableLoggerAdapter):
-
     def __init__(self, name: str, host: str, port: int):
         self.name = name
         self.host = host
         self.port = port
-        self.logger = _get_named_client_logger(
-            name=name,
-            host=host,
-            port=port
-        )
+        self.logger = _get_named_client_logger(name=name, host=host, port=port)
 
     def __getstate__(self) -> Dict[str, Any]:
         """
@@ -222,9 +216,9 @@ def __getstate__(self) -> Dict[str, Any]:
         the self.logger field and only returns the logger name.
         """
         return {
-            'name': self.name,
-            'host': self.host,
-            'port': self.port,
+            "name": self.name,
+            "host": self.host,
+            "port": self.port,
         }
 
     def __setstate__(self, state: Dict[str, Any]) -> None:
@@ -237,9 +231,9 @@ def __setstate__(self, state: Dict[str, Any]) -> None:
         state - dictionary, containing the logger name.
 
         """
-        self.name = state['name']
-        self.host = state['host']
-        self.port = state['port']
+        self.name = state["name"]
+        self.host = state["host"]
+        self.port = state["port"]
         self.logger = _get_named_client_logger(
             name=self.name,
             host=self.host,
@@ -264,7 +258,7 @@ def handle(self) -> None:
             chunk = self.connection.recv(4)  # type: ignore[attr-defined]
             if len(chunk) < 4:
                 break
-            slen = struct.unpack('>L', chunk)[0]
+            slen = struct.unpack(">L", chunk)[0]
             chunk = self.connection.recv(slen)  # type: ignore[attr-defined]
             while len(chunk) < slen:
                 chunk = chunk + self.connection.recv(slen - len(chunk))  # type: ignore[attr-defined]  # noqa: E501
@@ -301,9 +295,9 @@ def start_log_server(
     logging_config: Dict,
     output_dir: str,
 ) -> None:
-    setup_logger(filename=filename,
-                 logging_config=logging_config,
-                 output_dir=output_dir)
+    setup_logger(
+        filename=filename, logging_config=logging_config, output_dir=output_dir
+    )
 
     while True:
         # Loop until we find a valid port
@@ -334,7 +328,7 @@ class LogRecordSocketReceiver(socketserver.ThreadingTCPServer):
 
     def __init__(
         self,
-        host: str = 'localhost',
+        host: str = "localhost",
         port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
         handler: Type[LogRecordStreamHandler] = LogRecordStreamHandler,
         logname: Optional[str] = None,
@@ -347,9 +341,7 @@ def __init__(
 
     def serve_until_stopped(self) -> None:
         while True:
-            rd, wr, ex = select.select([self.socket.fileno()],
-                                       [], [],
-                                       self.timeout)
+            rd, wr, ex = select.select([self.socket.fileno()], [], [], self.timeout)
             if rd:
                 self.handle_request()
             if self.event is not None and self.event.is_set():
@@ -358,7 +350,7 @@ def serve_until_stopped(self) -> None:
 
 @contextmanager
 def warnings_to(logger: Optional[PicklableClientLogger] = None) -> Iterator[None]:
-    """ A context manager to catch warnings and send them to the logger
+    """A context manager to catch warnings and send them to the logger
 
     If no logger is passed, warnings propogate as they normally would.
 
@@ -371,6 +363,7 @@ def warnings_to(logger: Optional[PicklableClientLogger] = None) -> Iterator[None
     if logger:
 
         with warnings.catch_warnings():
+
             def to_log(
                 logger: PicklableClientLogger,
                 message: str,
@@ -378,15 +371,16 @@ def to_log(
                 filename: str,
                 lineno: int,
                 file: Optional[TextIO] = None,
-                line: Optional[str] = None
+                line: Optional[str] = None,
             ) -> None:
                 logger.warning(f"{filename}:{lineno} {category.__name__}:{message}")
 
             # Mypy was complaining that logger didn't exist in `to_log` see here:
             # https://mypy.readthedocs.io/en/stable/common_issues.html#narrowing-and-inner-functions
             # we explicitly pass it in and have to force it's type with `cast`
-            warnings.showwarning = lambda *args: \
-                to_log(cast(PicklableClientLogger, logger), *args)
+            warnings.showwarning = lambda *args: to_log(
+                cast(PicklableClientLogger, logger), *args
+            )
 
             yield
     # Else do nothing, warnings go to wherever they would without this context
diff --git a/autosklearn/util/parallel.py b/autosklearn/util/parallel.py
index 2f0ea6b016..0804588a61 100644
--- a/autosklearn/util/parallel.py
+++ b/autosklearn/util/parallel.py
@@ -5,16 +5,19 @@
 def preload_modules(context: multiprocessing.context.BaseContext) -> None:
     all_loaded_modules = sys.modules.keys()
     preload = [
-        loaded_module for loaded_module in all_loaded_modules
-        if loaded_module.split('.')[0] in (
-            'smac',
-            'autosklearn',
-            'numpy',
-            'scipy',
-            'pandas',
-            'pynisher',
-            'sklearn',
-            'ConfigSpace',
-        ) and 'logging' not in loaded_module
+        loaded_module
+        for loaded_module in all_loaded_modules
+        if loaded_module.split(".")[0]
+        in (
+            "smac",
+            "autosklearn",
+            "numpy",
+            "scipy",
+            "pandas",
+            "pynisher",
+            "sklearn",
+            "ConfigSpace",
+        )
+        and "logging" not in loaded_module
     ]
     context.set_forkserver_preload(preload)
diff --git a/autosklearn/util/pipeline.py b/autosklearn/util/pipeline.py
index c1f5a2ca23..d3291069f5 100755
--- a/autosklearn/util/pipeline.py
+++ b/autosklearn/util/pipeline.py
@@ -1,29 +1,27 @@
 # -*- encoding: utf-8 -*-
 from typing import Any, Dict, List, Optional, Union
 
-from ConfigSpace.configuration_space import ConfigurationSpace
-
 import numpy as np
+from ConfigSpace.configuration_space import ConfigurationSpace
 
 from autosklearn.constants import (
     BINARY_CLASSIFICATION,
     MULTICLASS_CLASSIFICATION,
     MULTILABEL_CLASSIFICATION,
     MULTIOUTPUT_REGRESSION,
-    REGRESSION_TASKS
+    REGRESSION_TASKS,
 )
 from autosklearn.pipeline.classification import SimpleClassificationPipeline
 from autosklearn.pipeline.regression import SimpleRegressionPipeline
 
-
-__all__ = ['get_configuration_space']
+__all__ = ["get_configuration_space"]
 
 
 def get_configuration_space(
     info: Dict[str, Any],
     include: Optional[Dict[str, List[str]]] = None,
     exclude: Optional[Dict[str, List[str]]] = None,
-    random_state: Optional[Union[int, np.random.RandomState]] = None
+    random_state: Optional[Union[int, np.random.RandomState]] = None,
 ) -> ConfigurationSpace:
     """Get the configuration of a pipeline given some dataset info
 
@@ -46,17 +44,19 @@ def get_configuration_space(
     ConfigurationSpace
         The configuration space for the pipeline
     """
-    if info['task'] in REGRESSION_TASKS:
+    if info["task"] in REGRESSION_TASKS:
         return _get_regression_configuration_space(info, include, exclude, random_state)
     else:
-        return _get_classification_configuration_space(info, include, exclude, random_state)
+        return _get_classification_configuration_space(
+            info, include, exclude, random_state
+        )
 
 
 def _get_regression_configuration_space(
     info: Dict[str, Any],
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
-    random_state: Optional[Union[int, np.random.RandomState]] = None
+    random_state: Optional[Union[int, np.random.RandomState]] = None,
 ) -> ConfigurationSpace:
     """Get the configuration of a regression pipeline given some dataset info
 
@@ -79,25 +79,22 @@ def _get_regression_configuration_space(
     ConfigurationSpace
         The configuration space for the regression pipeline
     """
-    task_type = info['task']
+    task_type = info["task"]
     sparse = False
     multioutput = False
     if task_type == MULTIOUTPUT_REGRESSION:
         multioutput = True
 
-    if info['is_sparse'] == 1:
+    if info["is_sparse"] == 1:
         sparse = True
 
-    dataset_properties = {
-        'multioutput': multioutput,
-        'sparse': sparse
-    }
+    dataset_properties = {"multioutput": multioutput, "sparse": sparse}
 
     configuration_space = SimpleRegressionPipeline(
         dataset_properties=dataset_properties,
         include=include,
         exclude=exclude,
-        random_state=random_state
+        random_state=random_state,
     ).get_hyperparameter_search_space()
     return configuration_space
 
@@ -106,7 +103,7 @@ def _get_classification_configuration_space(
     info: Dict[str, Any],
     include: Optional[Dict[str, List[str]]],
     exclude: Optional[Dict[str, List[str]]],
-    random_state: Optional[Union[int, np.random.RandomState]] = None
+    random_state: Optional[Union[int, np.random.RandomState]] = None,
 ) -> ConfigurationSpace:
     """Get the configuration of a classification pipeline given some dataset info
 
@@ -129,7 +126,7 @@ def _get_classification_configuration_space(
     ConfigurationSpace
         The configuration space for the classification pipeline
     """
-    task_type = info['task']
+    task_type = info["task"]
 
     multilabel = False
     multiclass = False
@@ -142,18 +139,18 @@ def _get_classification_configuration_space(
     if task_type == BINARY_CLASSIFICATION:
         pass
 
-    if info['is_sparse'] == 1:
+    if info["is_sparse"] == 1:
         sparse = True
 
     dataset_properties = {
-        'multilabel': multilabel,
-        'multiclass': multiclass,
-        'sparse': sparse
+        "multilabel": multilabel,
+        "multiclass": multiclass,
+        "sparse": sparse,
     }
 
     return SimpleClassificationPipeline(
         dataset_properties=dataset_properties,
         include=include,
         exclude=exclude,
-        random_state=random_state
+        random_state=random_state,
     ).get_hyperparameter_search_space()
diff --git a/autosklearn/util/single_thread_client.py b/autosklearn/util/single_thread_client.py
index 5cd7c653f4..057e29a25b 100644
--- a/autosklearn/util/single_thread_client.py
+++ b/autosklearn/util/single_thread_client.py
@@ -1,4 +1,5 @@
 import typing
+
 from pathlib import Path
 
 import dask.distributed
@@ -9,6 +10,7 @@ class DummyFuture(dask.distributed.Future):
     A class that mimics a distributed Future, the outcome of
     performing submit on a distributed client.
     """
+
     def __init__(self, result: typing.Any) -> None:
         self._result = result  # type: typing.Any
 
@@ -33,13 +35,24 @@ class SingleThreadedClient(dask.distributed.Client):
     A class to Mock the Distributed Client class, in case
     Auto-Sklearn is meant to run in the current Thread.
     """
+
     def __init__(self) -> None:
 
         # Raise a not implemented error if using a method from Client
-        implemented_methods = ['submit', 'close', 'shutdown', 'write_scheduler_file',
-                               '_get_scheduler_info', 'nthreads']
-        method_list = [func for func in dir(dask.distributed.Client) if callable(
-            getattr(dask.distributed.Client, func)) and not func.startswith('__')]
+        implemented_methods = [
+            "submit",
+            "close",
+            "shutdown",
+            "write_scheduler_file",
+            "_get_scheduler_info",
+            "nthreads",
+        ]
+        method_list = [
+            func
+            for func in dir(dask.distributed.Client)
+            if callable(getattr(dask.distributed.Client, func))
+            and not func.startswith("__")
+        ]
         for method in method_list:
             if method in implemented_methods:
                 continue
@@ -70,17 +83,17 @@ def write_scheduler_file(self, scheduler_file: str) -> None:
 
     def _get_scheduler_info(self) -> typing.Dict:
         return {
-            'workers': ['127.0.0.1'],
-            'type': 'Scheduler',
+            "workers": ["127.0.0.1"],
+            "type": "Scheduler",
         }
 
     def nthreads(self) -> typing.Dict:
         return {
-            '127.0.0.1': 1,
+            "127.0.0.1": 1,
         }
 
     def __repr__(self) -> str:
-        return 'SingleThreadedClient()'
+        return "SingleThreadedClient()"
 
     def __del__(self) -> None:
         pass
diff --git a/autosklearn/util/stopwatch.py b/autosklearn/util/stopwatch.py
index aff17a1acf..9ced028cd0 100644
--- a/autosklearn/util/stopwatch.py
+++ b/autosklearn/util/stopwatch.py
@@ -5,10 +5,11 @@
 @project: AutoML2015
 
 """
+from typing import Tuple
+
 import sys
 import time
 from collections import OrderedDict
-from typing import Tuple
 
 
 class TimingTask(object):
@@ -31,7 +32,7 @@ def stop(self) -> None:
             self._cpu_dur = self._cpu_tac - self._cpu_tic
             self._wall_dur = self._wall_tac - self._wall_tic
         else:
-            sys.stdout.write('Task has already stopped\n')
+            sys.stdout.write("Task has already stopped\n")
 
     @property
     def name(self) -> str:
@@ -72,7 +73,7 @@ class StopWatch:
 
     def __init__(self) -> None:
         self._tasks = OrderedDict()
-        self._tasks['stopwatch_time'] = TimingTask('stopwatch_time')
+        self._tasks["stopwatch_time"] = TimingTask("stopwatch_time")
 
     def insert_task(self, name: str, cpu_dur: float, wall_dur: float) -> None:
         if name not in self._tasks:
@@ -109,20 +110,20 @@ def stop_task(self, name: str) -> None:
         try:
             self._tasks[name].stop()
         except KeyError:
-            sys.stderr.write('There is no such task: %s\n' % name)
+            sys.stderr.write("There is no such task: %s\n" % name)
 
     def get_cpu_dur(self, name: str) -> float:
         try:
             return self._tasks[name].cpu_dur
         except KeyError:
-            sys.stderr.write('There is no such task: %s\n' % name)
+            sys.stderr.write("There is no such task: %s\n" % name)
         return 0.0
 
     def get_wall_dur(self, name: str) -> float:
         try:
             return self._tasks[name].wall_dur
         except KeyError:
-            sys.stderr.write('There is no such task: %s\n' % name)
+            sys.stderr.write("There is no such task: %s\n" % name)
         return 0.0
 
     def cpu_sum(self) -> float:
@@ -134,19 +135,27 @@ def wall_sum(self) -> float:
         return sum([max(0, self._tasks[tsk].wall_dur) for tsk in self._tasks])
 
     def __repr__(self) -> str:
-        ret_str = '| %10s | %10s | %10s | %10s | %10s | %10s | %10s |\n' % \
-                  ('Name', 'CPUStart', 'CPUEnd', 'CPUDur', 'WallStart',
-                   'WallEnd',
-                   'WallDur')
-        ret_str += '+' + '------------+' * 7 + '\n'
-        offset = self._tasks['stopwatch_time'].wall_tic
+        ret_str = "| %10s | %10s | %10s | %10s | %10s | %10s | %10s |\n" % (
+            "Name",
+            "CPUStart",
+            "CPUEnd",
+            "CPUDur",
+            "WallStart",
+            "WallEnd",
+            "WallDur",
+        )
+        ret_str += "+" + "------------+" * 7 + "\n"
+        offset = self._tasks["stopwatch_time"].wall_tic
         for tsk in self._tasks:
             if self._tasks[tsk].wall_tac:
                 wall_tac = self._tasks[tsk].wall_tac - offset
-            ret_str += '| %10s | %10.5f | %10.5f | %10.5f | %10s | %10s | %10s |\n' % \
-                       (tsk, self._tasks[tsk].cpu_tic, self._tasks[tsk].cpu_tac,
-                        self.cpu_elapsed(tsk),
-                        self._tasks[tsk].wall_tic - offset,
-                        wall_tac if self._tasks[tsk].wall_tac else False,
-                        self.wall_elapsed(tsk))
+            ret_str += "| %10s | %10.5f | %10.5f | %10.5f | %10s | %10s | %10s |\n" % (
+                tsk,
+                self._tasks[tsk].cpu_tic,
+                self._tasks[tsk].cpu_tac,
+                self.cpu_elapsed(tsk),
+                self._tasks[tsk].wall_tic - offset,
+                wall_tac if self._tasks[tsk].wall_tac else False,
+                self.wall_elapsed(tsk),
+            )
         return ret_str
diff --git a/doc/conf.py b/doc/conf.py
index 5d114b3550..6efcd5c736 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -21,11 +21,12 @@
 import sys
 import sphinx_bootstrap_theme
 import autosklearn
+
 # Add the parent directory of this file to the PYTHONPATH
 import os
 
 current_directory = os.path.dirname(__file__)
-parent_directory = os.path.join(current_directory, '..')
+parent_directory = os.path.join(current_directory, "..")
 parent_directory = os.path.abspath(parent_directory)
 sys.path.append(parent_directory)
 
@@ -39,15 +40,21 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary',
-              'sphinx.ext.doctest', 'sphinx.ext.coverage',
-              'sphinx.ext.mathjax', 'sphinx.ext.viewcode',
-              'sphinx_gallery.gen_gallery', 'sphinx.ext.autosectionlabel',
-              'sphinx_toolbox.collapse',
-              # sphinx.ext.autosexctionlabel raises duplicate label warnings
-              # because same section headers are used multiple times throughout
-              # the documentation.
-              'numpydoc']
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.doctest",
+    "sphinx.ext.coverage",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.viewcode",
+    "sphinx_gallery.gen_gallery",
+    "sphinx.ext.autosectionlabel",
+    "sphinx_toolbox.collapse",
+    # sphinx.ext.autosexctionlabel raises duplicate label warnings
+    # because same section headers are used multiple times throughout
+    # the documentation.
+    "numpydoc",
+]
 
 
 from sphinx_gallery.sorting import ExplicitOrder, FileNameSortKey
@@ -63,8 +70,9 @@
 # Sphinx-gallery configuration.
 
 # get current branch
-binder_branch = 'master'
+binder_branch = "master"
 import autosklearn
+
 if "dev" in autosklearn.__version__:
     binder_branch = "development"
 
@@ -72,7 +80,7 @@
 # We do this by setting an evironment variable we check and modifying the python config
 # object.
 # We have this extra processing as it enters as a raw string and we need a boolean value
-gallery_env_var ="SPHINX_GALLERY_PLOT"
+gallery_env_var = "SPHINX_GALLERY_PLOT"
 
 sphinx_plot_gallery_flag = True
 if gallery_env_var in os.environ:
@@ -82,52 +90,55 @@
     elif value in ["True", "true", "1"]:
         sphinx_plot_gallery_flag = True
     else:
-        raise ValueError(f'Env variable {gallery_env_var} must be set to "false" or "true"')
+        raise ValueError(
+            f'Env variable {gallery_env_var} must be set to "false" or "true"'
+        )
 
 sphinx_gallery_conf = {
     # path to the examples
-    'examples_dirs': '../examples',
+    "examples_dirs": "../examples",
     # path where to save gallery generated examples
-    'gallery_dirs': 'examples',
-    #TODO: fix back/forward references for the examples.
+    "gallery_dirs": "examples",
+    # TODO: fix back/forward references for the examples.
     #'doc_module': ('autosklearn'),
     #'reference_url': {
     #    'autosklearn': None
-    #},
-    'plot_gallery': sphinx_plot_gallery_flag,
-    'backreferences_dir': None,
-    'filename_pattern': 'example.*.py$',
-    'ignore_pattern': r'custom_metrics\.py|__init__\.py|example_parallel_manual_spawning_python.py',
-    'binder': {
-         # Required keys
-         'org': 'automl',
-         'repo': 'auto-sklearn',
-         'branch': binder_branch,
-         'binderhub_url': 'https://mybinder.org',
-         'dependencies': ['../.binder/apt.txt', '../.binder/requirements.txt'],
-         #'filepath_prefix': '<prefix>' # A prefix to prepend to any filepaths in Binder links.
-         # Jupyter notebooks for Binder will be copied to this directory (relative to built documentation root).
-         'notebooks_dir': 'notebooks/',
-         'use_jupyter_lab': True, # Whether Binder links should start Jupyter Lab instead of the Jupyter Notebook interface.
-         },
+    # },
+    "plot_gallery": sphinx_plot_gallery_flag,
+    "backreferences_dir": None,
+    "filename_pattern": "example.*.py$",
+    "ignore_pattern": r"custom_metrics\.py|__init__\.py|example_parallel_manual_spawning_python.py",
+    "binder": {
+        # Required keys
+        "org": "automl",
+        "repo": "auto-sklearn",
+        "branch": binder_branch,
+        "binderhub_url": "https://mybinder.org",
+        "dependencies": ["../.binder/apt.txt", "../.binder/requirements.txt"],
+        #'filepath_prefix': '<prefix>' # A prefix to prepend to any filepaths in Binder links.
+        # Jupyter notebooks for Binder will be copied to this directory (relative to built documentation root).
+        "notebooks_dir": "notebooks/",
+        "use_jupyter_lab": True,  # Whether Binder links should start Jupyter Lab instead of the Jupyter Notebook interface.
+    },
 }
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
 # source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'AutoSklearn'
-copyright = u"2014-{}, Machine Learning Professorship Freiburg".format(
-    datetime.datetime.now().year)
+project = "AutoSklearn"
+copyright = "2014-{}, Machine Learning Professorship Freiburg".format(
+    datetime.datetime.now().year
+)
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -150,7 +161,7 @@
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build', '_templates', '_static']
+exclude_patterns = ["_build", "_templates", "_static"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
@@ -168,7 +179,7 @@
 # show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
 # modindex_common_prefix = []
@@ -180,44 +191,37 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'bootstrap'
+html_theme = "bootstrap"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 html_theme_options = {
     # Navigation bar title. (Default: ``project`` value)
-    'navbar_title': "auto-sklearn",
-
+    "navbar_title": "auto-sklearn",
     # Tab name for entire site. (Default: "Site")
     # 'navbar_site_name': "Site",
-
     # A list of tuples containting pages to link to.  The value should
     # be in the form [(name, page), ..]
-    'navbar_links': [
-        ('Start', 'index'),
-        ('Releases', 'releases'),
-        ('Installation', 'installation'),
-        ('Manual', 'manual'),
-        ('Examples', 'examples/index'),
-        ('API', 'api'),
-        ('Extending', 'extending'),
-        ('FAQ', 'faq'),
+    "navbar_links": [
+        ("Start", "index"),
+        ("Releases", "releases"),
+        ("Installation", "installation"),
+        ("Manual", "manual"),
+        ("Examples", "examples/index"),
+        ("API", "api"),
+        ("Extending", "extending"),
+        ("FAQ", "faq"),
     ],
-
     # Render the next and previous page links in navbar. (Default: true)
-    'navbar_sidebarrel': False,
-
+    "navbar_sidebarrel": False,
     # Render the current pages TOC in the navbar. (Default: true)
-    'navbar_pagenav': False,
-
+    "navbar_pagenav": False,
     # Tab name for the current pages TOC. (Default: "Page")
-    'navbar_pagenav_name': "On this page",
-
+    "navbar_pagenav_name": "On this page",
     # Global TOC depth for "site" navbar tab. (Default: 1)
     # Switching to -1 shows all levels.
-    'globaltoc_depth': 1,
-
+    "globaltoc_depth": 1,
     # Include hidden TOCs in Site navbar?
     #
     # Note: If this is "false", you cannot have mixed ``:hidden:`` and
@@ -225,29 +229,24 @@
     # will break.
     #
     # Values: "true" (default) or "false"
-    'globaltoc_includehidden': "false",
-
+    "globaltoc_includehidden": "false",
     # HTML navbar class (Default: "navbar") to attach to <div> element.
     # For black navbar, do "navbar navbar-inverse"
-    'navbar_class': "navbar",
-
+    "navbar_class": "navbar",
     # Fix navigation bar to top of page?
     # Values: "true" (default) or "false"
-    'navbar_fixed_top': "true",
-
+    "navbar_fixed_top": "true",
     # Location of link to source.
     # Options are "nav" (default), "footer" or anything else to exclude.
-    'source_link_position': "footer",
-
+    "source_link_position": "footer",
     # Bootswatch (http://bootswatch.com/) theme.
     #
     # Options are nothing with "" (default) or the name of a valid theme
     # such as "amelia" or "cosmo".
-    'bootswatch_theme': "cosmo",
-
+    "bootswatch_theme": "cosmo",
     # Choose Bootstrap version.
     # Values: "3" (default) or "2" (in quotes)
-    'bootstrap_version': "3",
+    "bootstrap_version": "3",
 }
 
 # Add any paths that contain custom themes here, relative to this directory.
@@ -288,7 +287,7 @@
 # html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-html_sidebars = {'**': ['localtoc.html']}
+html_sidebars = {"**": ["localtoc.html"]}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
@@ -321,7 +320,7 @@
 # html_file_suffix = None
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'AutoSklearndoc'
+htmlhelp_basename = "AutoSklearndoc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
@@ -337,9 +336,15 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
-latex_documents = [('index', 'AutoSklearn.tex', u'AutoSklearn Documentation',
-                    u'Matthias Feurer, Aaron Klein, Katharina Eggensperger',
-                    'manual'), ]
+latex_documents = [
+    (
+        "index",
+        "AutoSklearn.tex",
+        "AutoSklearn Documentation",
+        "Matthias Feurer, Aaron Klein, Katharina Eggensperger",
+        "manual",
+    ),
+]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
@@ -365,8 +370,15 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [('index', 'autosklearn', u'AutoSklearn Documentation',
-              [u'Matthias Feurer, Aaron Klein, Katharina Eggensperger'], 1)]
+man_pages = [
+    (
+        "index",
+        "autosklearn",
+        "AutoSklearn Documentation",
+        ["Matthias Feurer, Aaron Klein, Katharina Eggensperger"],
+        1,
+    )
+]
 
 # If true, show URL addresses after external links.
 # man_show_urls = False
@@ -376,10 +388,17 @@
 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
-texinfo_documents = [('index', 'AutoSklearn', u'AutoSklearn Documentation',
-                      u'Matthias Feurer, Aaron Klein, Katharina Eggensperger',
-                      'AutoSklearn', 'One line description of project.',
-                      'Miscellaneous'), ]
+texinfo_documents = [
+    (
+        "index",
+        "AutoSklearn",
+        "AutoSklearn Documentation",
+        "Matthias Feurer, Aaron Klein, Katharina Eggensperger",
+        "AutoSklearn",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
 
 # Documents to append as an appendix to all manuals.
 # texinfo_appendices = []
@@ -396,12 +415,12 @@
 # Only the class’ docstring is inserted. This is the default.
 # You can still document __init__ as a separate method using automethod or
 # the members option to autoclass.
-#"both"
+# "both"
 # Both the class’ and the __init__ method’s docstring are concatenated and
 # inserted.
 # "init"
 # Only the __init__ method’s docstring is inserted.
-autoclass_content = 'both'
+autoclass_content = "both"
 
 
 def setup(app):
diff --git a/examples/20_basic/example_classification.py b/examples/20_basic/example_classification.py
index fcb99b65ef..621dcf4f86 100644
--- a/examples/20_basic/example_classification.py
+++ b/examples/20_basic/example_classification.py
@@ -20,8 +20,9 @@
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Build and fit a classifier
@@ -30,9 +31,9 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_classification_example_tmp',
+    tmp_folder="/tmp/autosklearn_classification_example_tmp",
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 ############################################################################
 # View the models found by auto-sklearn
@@ -52,4 +53,3 @@
 
 predictions = automl.predict(X_test)
 print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
-
diff --git a/examples/20_basic/example_multilabel_classification.py b/examples/20_basic/example_multilabel_classification.py
index 835b110ea6..bedf974868 100644
--- a/examples/20_basic/example_multilabel_classification.py
+++ b/examples/20_basic/example_multilabel_classification.py
@@ -29,8 +29,8 @@
 # This is to comply with Scikit-learn requirement:
 # "Positive classes are indicated with 1 and negative classes with 0 or -1."
 # More information on: https://scikit-learn.org/stable/modules/multiclass.html
-y[y == 'TRUE'] = 1
-y[y == 'FALSE'] = 0
+y[y == "TRUE"] = 1
+y[y == "FALSE"] = 0
 y = y.astype(int)
 
 # Using type of target is a good way to make sure your data
@@ -51,9 +51,9 @@
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 1},
+    smac_scenario_args={"runcount_limit": 1},
 )
-automl.fit(X_train, y_train, dataset_name='reuters')
+automl.fit(X_train, y_train, dataset_name="reuters")
 
 ############################################################################
 # View the models found by auto-sklearn
diff --git a/examples/20_basic/example_multioutput_regression.py b/examples/20_basic/example_multioutput_regression.py
index a2e345fcac..cb12643adb 100644
--- a/examples/20_basic/example_multioutput_regression.py
+++ b/examples/20_basic/example_multioutput_regression.py
@@ -32,9 +32,9 @@
 automl = AutoSklearnRegressor(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_multioutput_regression_example_tmp',
+    tmp_folder="/tmp/autosklearn_multioutput_regression_example_tmp",
 )
-automl.fit(X_train, y_train, dataset_name='synthetic')
+automl.fit(X_train, y_train, dataset_name="synthetic")
 
 ############################################################################
 # View the models found by auto-sklearn
diff --git a/examples/20_basic/example_regression.py b/examples/20_basic/example_regression.py
index 6b47607db0..5ade1c2866 100644
--- a/examples/20_basic/example_regression.py
+++ b/examples/20_basic/example_regression.py
@@ -21,8 +21,9 @@
 
 X, y = sklearn.datasets.load_diabetes(return_X_y=True)
 
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ###########################
 # Build and fit a regressor
@@ -31,9 +32,9 @@
 automl = autosklearn.regression.AutoSklearnRegressor(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_regression_example_tmp',
+    tmp_folder="/tmp/autosklearn_regression_example_tmp",
 )
-automl.fit(X_train, y_train, dataset_name='diabetes')
+automl.fit(X_train, y_train, dataset_name="diabetes")
 
 ############################################################################
 # View the models found by auto-sklearn
@@ -69,12 +70,12 @@
 # than the true value), points above the diagonal were underestimated (predicted value is lower than
 # the true value).
 
-plt.scatter(train_predictions, y_train, label="Train samples", c='#d95f02')
-plt.scatter(test_predictions, y_test, label="Test samples", c='#7570b3')
+plt.scatter(train_predictions, y_train, label="Train samples", c="#d95f02")
+plt.scatter(test_predictions, y_test, label="Test samples", c="#7570b3")
 plt.xlabel("Predicted value")
 plt.ylabel("True value")
 plt.legend()
-plt.plot([30, 400], [30, 400], c='k', zorder=0)
+plt.plot([30, 400], [30, 400], c="k", zorder=0)
 plt.xlim([30, 400])
 plt.ylim([30, 400])
 plt.tight_layout()
diff --git a/examples/40_advanced/custom_metrics.py b/examples/40_advanced/custom_metrics.py
index 6b548e5718..c6ad14efdd 100644
--- a/examples/40_advanced/custom_metrics.py
+++ b/examples/40_advanced/custom_metrics.py
@@ -9,6 +9,7 @@
 # Custom metrics definition
 # =========================
 
+
 def accuracy(solution, prediction):
     # custom function defining accuracy
     return np.mean(solution == prediction)
diff --git a/examples/40_advanced/example_calc_multiple_metrics.py b/examples/40_advanced/example_calc_multiple_metrics.py
index c7a4e78503..fa4d17cc1e 100644
--- a/examples/40_advanced/example_calc_multiple_metrics.py
+++ b/examples/40_advanced/example_calc_multiple_metrics.py
@@ -25,9 +25,9 @@ def error(solution, prediction):
 
 def get_metric_result(cv_results):
     results = pd.DataFrame.from_dict(cv_results)
-    results = results[results['status'] == "Success"]
-    cols = ['rank_test_scores', 'param_classifier:__choice__', 'mean_test_score']
-    cols.extend([key for key in cv_results.keys() if key.startswith('metric_')])
+    results = results[results["status"] == "Success"]
+    cols = ["rank_test_scores", "param_classifier:__choice__", "mean_test_score"]
+    cols.extend([key for key in cv_results.keys() if key.startswith("metric_")])
     return results[cols]
 
 
@@ -36,25 +36,26 @@ def get_metric_result(cv_results):
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Build and fit a classifier
 # ==========================
 
 error_rate = autosklearn.metrics.make_scorer(
-    name='custom_error',
+    name="custom_error",
     score_func=error,
     optimum=0,
     greater_is_better=False,
     needs_proba=False,
-    needs_threshold=False
+    needs_threshold=False,
 )
 cls = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    scoring_functions=[balanced_accuracy, precision, recall, f1, error_rate]
+    scoring_functions=[balanced_accuracy, precision, recall, f1, error_rate],
 )
 cls.fit(X_train, y_train, X_test, y_test)
 
diff --git a/examples/40_advanced/example_debug_logging.py b/examples/40_advanced/example_debug_logging.py
index 07e2e3ed99..664ce0b461 100644
--- a/examples/40_advanced/example_debug_logging.py
+++ b/examples/40_advanced/example_debug_logging.py
@@ -28,8 +28,9 @@
 # Load kr-vs-kp dataset from https://www.openml.org/d/3
 X, y = data = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)
 
-X_train, X_test, y_train, y_test = \
-     sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 
 ############################################################################
@@ -40,33 +41,31 @@
 # We will instead create a custom one as follows:
 
 logging_config = {
-    'version': 1,
-    'disable_existing_loggers': True,
-    'formatters': {
-        'custom': {
+    "version": 1,
+    "disable_existing_loggers": True,
+    "formatters": {
+        "custom": {
             # More format options are available in the official
             # `documentation <https://docs.python.org/3/howto/logging-cookbook.html>`_
-            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+            "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
         }
     },
-
     # Any INFO level msg will be printed to the console
-    'handlers': {
-        'console': {
-            'level': 'INFO',
-            'formatter': 'custom',
-            'class': 'logging.StreamHandler',
-            'stream': 'ext://sys.stdout',
+    "handlers": {
+        "console": {
+            "level": "INFO",
+            "formatter": "custom",
+            "class": "logging.StreamHandler",
+            "stream": "ext://sys.stdout",
         },
     },
-
-    'loggers': {
-        '': {  # root logger
-            'level': 'DEBUG',
+    "loggers": {
+        "": {  # root logger
+            "level": "DEBUG",
         },
-        'Client-EnsembleBuilder': {
-            'level': 'DEBUG',
-            'handlers': ['console'],
+        "Client-EnsembleBuilder": {
+            "level": "DEBUG",
+            "handlers": ["console"],
         },
     },
 }
@@ -80,11 +79,11 @@
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 2},
+    smac_scenario_args={"runcount_limit": 2},
     # Pass the config file we created
     logging_config=logging_config,
     # *auto-sklearn* generates temporal files under tmp_folder
-    tmp_folder='./tmp_folder',
+    tmp_folder="./tmp_folder",
     # By default tmp_folder is deleted. We will preserve it
     # for debug purposes
     delete_tmp_folder_after_terminate=False,
@@ -101,5 +100,5 @@
 #   * tmp_folder/smac3-output
 # Auto-sklearn always outputs to this log file
 # tmp_folder/AutoML*.log
-for filename in pathlib.Path('./tmp_folder').glob('*'):
+for filename in pathlib.Path("./tmp_folder").glob("*"):
     print(filename)
diff --git a/examples/40_advanced/example_feature_types.py b/examples/40_advanced/example_feature_types.py
index 6317eb5a46..7d22edd715 100644
--- a/examples/40_advanced/example_feature_types.py
+++ b/examples/40_advanced/example_feature_types.py
@@ -4,9 +4,10 @@
 Feature Types
 =============
 
-In *auto-sklearn* it is possible to specify the feature types of a dataset when calling the method
-:meth:`fit() <autosklearn.classification.AutoSklearnClassifier.fit>` by specifying the argument
-``feat_type``. The following example demonstrates a way it can be done.
+In *auto-sklearn* it is possible to specify the feature types of a dataset when calling
+the method :meth:`fit() <autosklearn.classification.AutoSklearnClassifier.fit>` by
+specifying the argument ``feat_type``.
+The following example demonstrates a way it can be done.
 
 Additionally, you can provide a properly formatted pandas DataFrame, and the feature
 types will be automatically inferred, as demonstrated in
@@ -26,11 +27,12 @@
 # ============
 # Load Australian dataset from https://www.openml.org/d/40981
 bunch = data = sklearn.datasets.fetch_openml(data_id=40981, as_frame=True)
-y = bunch['target'].to_numpy()
-X = bunch['data'].to_numpy(np.float)
+y = bunch["target"].to_numpy()
+X = bunch["data"].to_numpy(np.float)
 
-X_train, X_test, y_train, y_test = \
-     sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 # Auto-sklearn can automatically recognize categorical/numerical data from a pandas
 # DataFrame. This example highlights how the user can provide the feature types,
@@ -38,8 +40,7 @@
 # feat_type is a list that tags each column from a DataFrame/ numpy array / list
 # with the case-insensitive string categorical or numerical, accordingly.
 feat_type = [
-    'Categorical' if x.name == 'category' else 'Numerical'
-    for x in bunch['data'].dtypes
+    "Categorical" if x.name == "category" else "Numerical" for x in bunch["data"].dtypes
 ]
 
 ############################################################################
@@ -51,7 +52,7 @@
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 1},
+    smac_scenario_args={"runcount_limit": 1},
 )
 cls.fit(X_train, y_train, X_test, y_test, feat_type=feat_type)
 
diff --git a/examples/40_advanced/example_get_pipeline_components.py b/examples/40_advanced/example_get_pipeline_components.py
index f7a97ead27..80686889ac 100644
--- a/examples/40_advanced/example_get_pipeline_components.py
+++ b/examples/40_advanced/example_get_pipeline_components.py
@@ -27,8 +27,9 @@
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Build and fit the classifier
@@ -40,20 +41,16 @@
     disable_evaluator_output=False,
     # To simplify querying the models in the final ensemble, we
     # restrict auto-sklearn to use only pca as a preprocessor
-    include={
-        'feature_preprocessor': ['pca']
-    },
+    include={"feature_preprocessor": ["pca"]},
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 ############################################################################
 # Predict using the model
 # =======================
 
 predictions = automl.predict(X_test)
-print("Accuracy score:{}".format(
-    sklearn.metrics.accuracy_score(y_test, predictions))
-)
+print("Accuracy score:{}".format(sklearn.metrics.accuracy_score(y_test, predictions)))
 
 
 ############################################################################
@@ -104,7 +101,7 @@
 # Let's iterative over all entries
 
 for run_key in automl.automl_.runhistory_.data:
-    print('#########')
+    print("#########")
     print(run_key)
     print(automl.automl_.runhistory_.data[run_key])
 
@@ -166,7 +163,7 @@
 print("Lowest loss:", losses_and_configurations[0][0])
 print(
     "Best configuration:",
-    automl.automl_.runhistory_.ids_config[losses_and_configurations[0][1]]
+    automl.automl_.runhistory_.ids_config[losses_and_configurations[0][1]],
 )
 
 ############################################################################
@@ -188,7 +185,7 @@
 # The explained variance ratio per stage
 for i, (weight, pipeline) in enumerate(automl.get_models_with_weights()):
     for stage_name, component in pipeline.named_steps.items():
-        if 'feature_preprocessor' in stage_name:
+        if "feature_preprocessor" in stage_name:
             print(
                 "The {}th pipeline has a explained variance of {}".format(
                     i,
@@ -196,6 +193,6 @@
                     # Access the sklearn object via the choice attribute
                     # We want the explained variance attributed of
                     # each principal component
-                    component.choice.preprocessor.explained_variance_ratio_
+                    component.choice.preprocessor.explained_variance_ratio_,
                 )
             )
diff --git a/examples/40_advanced/example_inspect_predictions.py b/examples/40_advanced/example_inspect_predictions.py
index 24e149a37b..cf6de2476f 100644
--- a/examples/40_advanced/example_inspect_predictions.py
+++ b/examples/40_advanced/example_inspect_predictions.py
@@ -36,9 +36,9 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_inspect_predictions_example_tmp',
+    tmp_folder="/tmp/autosklearn_inspect_predictions_example_tmp",
 )
-automl.fit(X_train, y_train, dataset_name='Run_or_walk_information')
+automl.fit(X_train, y_train, dataset_name="Run_or_walk_information")
 
 s = automl.score(X_train, y_train)
 print(f"Train score {s}")
@@ -61,16 +61,19 @@
 r = permutation_importance(automl, X_test, y_test, n_repeats=10, random_state=0)
 sort_idx = r.importances_mean.argsort()[::-1]
 
-plt.boxplot(r.importances[sort_idx].T,
-            labels=[dataset.feature_names[i] for i in sort_idx])
+plt.boxplot(
+    r.importances[sort_idx].T, labels=[dataset.feature_names[i] for i in sort_idx]
+)
 
 plt.xticks(rotation=90)
 plt.tight_layout()
 plt.show()
 
 for i in sort_idx[::-1]:
-    print(f"{dataset.feature_names[i]:10s}: {r.importances_mean[i]:.3f} +/- "
-          f"{r.importances_std[i]:.3f}")
+    print(
+        f"{dataset.feature_names[i]:10s}: {r.importances_mean[i]:.3f} +/- "
+        f"{r.importances_std[i]:.3f}"
+    )
 
 ############################################################################################
 # Create partial dependence (PD) and individual conditional expectation (ICE) plots - part 2
@@ -90,11 +93,14 @@
 # combining ICE (thin lines) and PD (thick line)
 
 features = [1, 2]
-plot_partial_dependence(automl, dataset.data,
-                        features=features,
-                        grid_resolution=5,
-                        kind="both",
-                        feature_names=dataset.feature_names)
+plot_partial_dependence(
+    automl,
+    dataset.data,
+    features=features,
+    grid_resolution=5,
+    kind="both",
+    feature_names=dataset.feature_names,
+)
 plt.tight_layout()
 plt.show()
 
@@ -106,9 +112,12 @@
 # these features. Again, we'll look at acceleration_y and acceleration_z.
 
 features = [[1, 2]]
-plot_partial_dependence(automl, dataset.data,
-                        features=features,
-                        grid_resolution=5,
-                        feature_names=dataset.feature_names)
+plot_partial_dependence(
+    automl,
+    dataset.data,
+    features=features,
+    grid_resolution=5,
+    feature_names=dataset.feature_names,
+)
 plt.tight_layout()
 plt.show()
diff --git a/examples/40_advanced/example_interpretable_models.py b/examples/40_advanced/example_interpretable_models.py
index a78695082c..7b551de7b8 100644
--- a/examples/40_advanced/example_interpretable_models.py
+++ b/examples/40_advanced/example_interpretable_models.py
@@ -29,7 +29,9 @@
 # Show available preprocessors
 # ============================
 
-from autosklearn.pipeline.components.feature_preprocessing import FeaturePreprocessorChoice
+from autosklearn.pipeline.components.feature_preprocessing import (
+    FeaturePreprocessorChoice,
+)
 
 for name in FeaturePreprocessorChoice.get_components():
     print(name)
@@ -39,8 +41,9 @@
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Build and fit a classifier
@@ -55,18 +58,18 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_interpretable_models_example_tmp',
+    tmp_folder="/tmp/autosklearn_interpretable_models_example_tmp",
     include={
-        'classifier': [
-            'decision_tree', 'lda', 'sgd'
+        "classifier": ["decision_tree", "lda", "sgd"],
+        "feature_preprocessor": [
+            "no_preprocessing",
+            "polynomial",
+            "select_percentile_classification",
         ],
-        'feature_preprocessor': [
-            'no_preprocessing', 'polynomial', 'select_percentile_classification'
-        ]
     },
     ensemble_size=1,
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 ############################################################################
 # Print the final ensemble constructed by auto-sklearn
diff --git a/examples/40_advanced/example_metrics.py b/examples/40_advanced/example_metrics.py
index 2cf39f1553..33d0f678fd 100644
--- a/examples/40_advanced/example_metrics.py
+++ b/examples/40_advanced/example_metrics.py
@@ -51,8 +51,9 @@ def error_wk(solution, prediction, extra_argument):
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Print a list of available metrics
@@ -68,7 +69,7 @@ def error_wk(solution, prediction, extra_argument):
 # First example: Use predefined accuracy metric
 # =============================================
 
-print("#"*80)
+print("#" * 80)
 print("Use predefined accuracy metric")
 cls = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=60,
@@ -87,7 +88,7 @@ def error_wk(solution, prediction, extra_argument):
 # Second example: Use own accuracy metric
 # =======================================
 
-print("#"*80)
+print("#" * 80)
 print("Use self defined accuracy metric")
 accuracy_scorer = autosklearn.metrics.make_scorer(
     name="accu",
@@ -114,15 +115,15 @@ def error_wk(solution, prediction, extra_argument):
 # Third example: Use own error metric
 # ===================================
 
-print("#"*80)
+print("#" * 80)
 print("Use self defined error metric")
 error_rate = autosklearn.metrics.make_scorer(
-    name='error',
+    name="error",
     score_func=error,
     optimum=0,
     greater_is_better=False,
     needs_proba=False,
-    needs_threshold=False
+    needs_threshold=False,
 )
 cls = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=60,
@@ -141,7 +142,7 @@ def error_wk(solution, prediction, extra_argument):
 # Fourth example: Use own accuracy metric with additional argument
 # ================================================================
 
-print("#"*80)
+print("#" * 80)
 print("Use self defined accuracy with additional argument")
 accuracy_scorer = autosklearn.metrics.make_scorer(
     name="accu_add",
@@ -153,10 +154,7 @@ def error_wk(solution, prediction, extra_argument):
     extra_argument=None,
 )
 cls = autosklearn.classification.AutoSklearnClassifier(
-    time_left_for_this_task=60,
-    per_run_time_limit=30,
-    seed=1,
-    metric=accuracy_scorer
+    time_left_for_this_task=60, per_run_time_limit=30, seed=1, metric=accuracy_scorer
 )
 cls.fit(X_train, y_train)
 
@@ -169,7 +167,7 @@ def error_wk(solution, prediction, extra_argument):
 # Fifth example: Use own accuracy metric with additional argument
 # ===============================================================
 
-print("#"*80)
+print("#" * 80)
 print("Use self defined error with additional argument")
 error_rate = autosklearn.metrics.make_scorer(
     name="error_add",
diff --git a/examples/40_advanced/example_pandas_train_test.py b/examples/40_advanced/example_pandas_train_test.py
index 910cac4c31..7e584fd8aa 100644
--- a/examples/40_advanced/example_pandas_train_test.py
+++ b/examples/40_advanced/example_pandas_train_test.py
@@ -58,22 +58,19 @@
 # Targets for classification are also automatically encoded
 # If using fetch_openml, data is already properly encoded, below
 # is an example for user reference
-X = pd.DataFrame(
-    data=X,
-    columns=['A' + str(i) for i in range(1, 15)]
-)
-desired_boolean_columns = ['A1']
-desired_categorical_columns = ['A4', 'A5', 'A6', 'A8', 'A9', 'A11', 'A12']
-desired_numerical_columns = ['A2', 'A3', 'A7', 'A10', 'A13', 'A14']
+X = pd.DataFrame(data=X, columns=["A" + str(i) for i in range(1, 15)])
+desired_boolean_columns = ["A1"]
+desired_categorical_columns = ["A4", "A5", "A6", "A8", "A9", "A11", "A12"]
+desired_numerical_columns = ["A2", "A3", "A7", "A10", "A13", "A14"]
 for column in X.columns:
     if column in desired_boolean_columns:
-        X[column] = X[column].astype('bool')
+        X[column] = X[column].astype("bool")
     elif column in desired_categorical_columns:
-        X[column] = X[column].astype('category')
+        X[column] = X[column].astype("category")
     else:
         X[column] = pd.to_numeric(X[column])
 
-y = pd.DataFrame(y, dtype='category')
+y = pd.DataFrame(y, dtype="category")
 
 X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
     X, y, test_size=0.5, random_state=3
@@ -100,15 +97,15 @@
 ############################################################################
 # Plot the ensemble performance
 # ===================================
-# The *performance_over_time_* attribute returns a pandas dataframe, which can 
+# The *performance_over_time_* attribute returns a pandas dataframe, which can
 # be directly used for plotting
 
 poT = cls.performance_over_time_
 poT.plot(
-    x='Timestamp',
-    kind='line',
+    x="Timestamp",
+    kind="line",
     legend=True,
-    title='Auto-sklearn accuracy over time',
+    title="Auto-sklearn accuracy over time",
     grid=True,
 )
 plt.show()
diff --git a/examples/40_advanced/example_resampling.py b/examples/40_advanced/example_resampling.py
index 124316a60a..aa6a272373 100644
--- a/examples/40_advanced/example_resampling.py
+++ b/examples/40_advanced/example_resampling.py
@@ -22,8 +22,9 @@
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Holdout
@@ -32,15 +33,15 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_resampling_example_tmp',
+    tmp_folder="/tmp/autosklearn_resampling_example_tmp",
     disable_evaluator_output=False,
     # 'holdout' with 'train_size'=0.67 is the default argument setting
     # for AutoSklearnClassifier. It is explicitly specified in this example
     # for demonstrational purpose.
-    resampling_strategy='holdout',
-    resampling_strategy_arguments={'train_size': 0.67},
+    resampling_strategy="holdout",
+    resampling_strategy_arguments={"train_size": 0.67},
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 ############################################################################
 # Get the Score of the final ensemble
@@ -57,18 +58,18 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_resampling_example_tmp',
+    tmp_folder="/tmp/autosklearn_resampling_example_tmp",
     disable_evaluator_output=False,
-    resampling_strategy='cv',
-    resampling_strategy_arguments={'folds': 5},
+    resampling_strategy="cv",
+    resampling_strategy_arguments={"folds": 5},
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 # One can use models trained during cross-validation directly to predict
 # for unseen data. For this, all k models trained during k-fold
 # cross-validation are considered as a single soft-voting ensemble inside
 # the ensemble constructed with ensemble selection.
-print('Before re-fit')
+print("Before re-fit")
 predictions = automl.predict(X_test)
 print("Accuracy score CV", sklearn.metrics.accuracy_score(y_test, predictions))
 
@@ -78,7 +79,7 @@
 # During fit(), models are fit on individual cross-validation folds. To use
 # all available data, we call refit() which trains all models in the
 # final ensemble on the whole dataset.
-print('After re-fit')
+print("After re-fit")
 automl.refit(X_train.copy(), y_train.copy())
 predictions = automl.predict(X_test)
 print("Accuracy score CV", sklearn.metrics.accuracy_score(y_test, predictions))
@@ -106,11 +107,11 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_resampling_example_tmp',
+    tmp_folder="/tmp/autosklearn_resampling_example_tmp",
     disable_evaluator_output=False,
     resampling_strategy=resampling_strategy,
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 print(automl.sprint_statistics())
 
@@ -126,4 +127,6 @@
 # Obviously, this score is pretty bad as we "destroyed" the dataset by
 # splitting it on the first feature.
 predictions = automl.predict(X_test)
-print("Accuracy score custom split", sklearn.metrics.accuracy_score(y_test, predictions))
+print(
+    "Accuracy score custom split", sklearn.metrics.accuracy_score(y_test, predictions)
+)
diff --git a/examples/40_advanced/example_single_configuration.py b/examples/40_advanced/example_single_configuration.py
index 3d230f4ab0..d216caef7c 100644
--- a/examples/40_advanced/example_single_configuration.py
+++ b/examples/40_advanced/example_single_configuration.py
@@ -46,9 +46,7 @@
     # We will limit the configuration space only to
     # have RandomForest as a valid model. We recommend enabling all
     # possible models to get a better performance.
-    include={
-        'classifier': ['random_forest']
-    },
+    include={"classifier": ["random_forest"]},
     delete_tmp_folder_after_terminate=False,
 )
 
@@ -60,17 +58,21 @@
 # min_samples_split in the Random Forest. We recommend you to look into
 # how the ConfigSpace package works here:
 # https://automl.github.io/ConfigSpace/master/
-cs = cls.get_configuration_space(X, y, dataset_name='kr-vs-kp')
+cs = cls.get_configuration_space(X, y, dataset_name="kr-vs-kp")
 config = cs.sample_configuration()
-config._values['classifier:random_forest:min_samples_split'] = 11
+config._values["classifier:random_forest:min_samples_split"] = 11
 
 # Make sure that your changed configuration complies with the configuration space
 config.is_valid_configuration()
 
-pipeline, run_info, run_value = cls.fit_pipeline(X=X_train, y=y_train,
-                                                 dataset_name='kr-vs-kp',
-                                                 config=config,
-                                                 X_test=X_test, y_test=y_test)
+pipeline, run_info, run_value = cls.fit_pipeline(
+    X=X_train,
+    y=y_train,
+    dataset_name="kr-vs-kp",
+    config=config,
+    X_test=X_test,
+    y_test=y_test,
+)
 
 # This object complies with Scikit-Learn Pipeline API.
 # https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
@@ -84,9 +86,9 @@
 
 # We can make sure that our pipeline configuration was honored as follows
 print("Passed Configuration:", pipeline.config)
-print("Random Forest:", pipeline.named_steps['classifier'].choice.estimator)
+print("Random Forest:", pipeline.named_steps["classifier"].choice.estimator)
 
 # We can also search for new configurations using the fit() method
 # Any configurations found by Auto-Sklearn -- even the ones created using
 # fit_pipeline() are stored to disk and can be used for Ensemble Selection
-cs = cls.fit(X, y, dataset_name='kr-vs-kp')
+cs = cls.fit(X, y, dataset_name="kr-vs-kp")
diff --git a/examples/40_advanced/example_text_preprocessing.py b/examples/40_advanced/example_text_preprocessing.py
index f60188781b..76c2d91cfc 100644
--- a/examples/40_advanced/example_text_preprocessing.py
+++ b/examples/40_advanced/example_text_preprocessing.py
@@ -25,20 +25,28 @@
 print(f"{X.info()}\n")
 
 # manually convert these to string columns
-X = X.astype({'name': 'string', 'ticket': 'string', 'cabin': 'string', 'boat': 'string',
-              'home.dest': 'string'})
+X = X.astype(
+    {
+        "name": "string",
+        "ticket": "string",
+        "cabin": "string",
+        "boat": "string",
+        "home.dest": "string",
+    }
+)
 
 # now *auto-sklearn* handles the string columns with its text feature preprocessing pipeline
 
-X_train, X_test, y_train, y_test = \
-     sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 cls = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=30,
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 1},
+    smac_scenario_args={"runcount_limit": 1},
 )
 
 cls.fit(X_train, y_train, X_test, y_test)
@@ -48,20 +56,24 @@
 
 
 X, y = sklearn.datasets.fetch_openml(data_id=40945, return_X_y=True, as_frame=True)
-X = X.select_dtypes(exclude=['object'])
+X = X.select_dtypes(exclude=["object"])
 
-X_train, X_test, y_train, y_test = \
-     sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 cls = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=30,
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 1},
+    smac_scenario_args={"runcount_limit": 1},
 )
 
 cls.fit(X_train, y_train, X_test, y_test)
 
 predictions = cls.predict(X_test)
-print("Accuracy score without text preprocessing", sklearn.metrics.accuracy_score(y_test, predictions))
+print(
+    "Accuracy score without text preprocessing",
+    sklearn.metrics.accuracy_score(y_test, predictions),
+)
diff --git a/examples/60_search/example_parallel_manual_spawning_cli.py b/examples/60_search/example_parallel_manual_spawning_cli.py
index 41200cd78c..fa2bff375b 100644
--- a/examples/60_search/example_parallel_manual_spawning_cli.py
+++ b/examples/60_search/example_parallel_manual_spawning_cli.py
@@ -68,7 +68,7 @@
 from autosklearn.classification import AutoSklearnClassifier
 from autosklearn.constants import MULTICLASS_CLASSIFICATION
 
-tmp_folder = '/tmp/autosklearn_parallel_3_example_tmp'
+tmp_folder = "/tmp/autosklearn_parallel_3_example_tmp"
 
 worker_processes = []
 
@@ -83,7 +83,7 @@
 # location. This filename is also given to the worker so they can find all
 # relevant information to connect to the scheduler.
 
-scheduler_file_name = 'scheduler-file.json'
+scheduler_file_name = "scheduler-file.json"
 
 
 ############################################################################
@@ -99,12 +99,16 @@
 # We will now execute this bash command from within Python to have a
 # self-contained example:
 
+
 def cli_start_scheduler(scheduler_file_name):
-    command = (
-        f"dask-scheduler --scheduler-file {scheduler_file_name} --idle-timeout 10"
+    command = f"dask-scheduler --scheduler-file {scheduler_file_name} --idle-timeout 10"
+    proc = subprocess.run(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        shell=True,
+        check=True,
     )
-    proc = subprocess.run(command, stdout=subprocess.PIPE,
-                          stderr=subprocess.STDOUT, shell=True, check=True)
     while proc.returncode is None:
         time.sleep(1)
 
@@ -112,7 +116,7 @@ def cli_start_scheduler(scheduler_file_name):
 if __name__ == "__main__":
     process_python_worker = multiprocessing.Process(
         target=cli_start_scheduler,
-        args=(scheduler_file_name, ),
+        args=(scheduler_file_name,),
     )
     process_python_worker.start()
     worker_processes.append(process_python_worker)
@@ -141,22 +145,25 @@ def cli_start_scheduler(scheduler_file_name):
 # We disable dask's memory management by passing ``--memory-limit`` as
 # Auto-sklearn does the memory management itself.
 
+
 def cli_start_worker(scheduler_file_name):
     command = (
         "DASK_DISTRIBUTED__WORKER__DAEMON=False "
         "dask-worker --nthreads 1 --lifetime 35 --memory-limit 0 "
         f"--scheduler-file {scheduler_file_name}"
     )
-    proc = subprocess.run(command, stdout=subprocess.PIPE,
-                          stderr=subprocess.STDOUT, shell=True)
+    proc = subprocess.run(
+        command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
+    )
     while proc.returncode is None:
         time.sleep(1)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     for _ in range(2):
         process_cli_worker = multiprocessing.Process(
             target=cli_start_worker,
-            args=(scheduler_file_name, ),
+            args=(scheduler_file_name,),
         )
         process_cli_worker.start()
         worker_processes.append(process_cli_worker)
@@ -178,8 +185,9 @@ def cli_start_worker(scheduler_file_name):
 # ~~~~~~~~~~~~~~~~~~
 if __name__ == "__main__":
     X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-    X_train, X_test, y_train, y_test = \
-        sklearn.model_selection.train_test_split(X, y, random_state=1)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X, y, random_state=1
+    )
 
     automl = AutoSklearnClassifier(
         delete_tmp_folder_after_terminate=False,
@@ -198,7 +206,7 @@ def cli_start_worker(scheduler_file_name):
     automl.fit_ensemble(
         y_train,
         task=MULTICLASS_CLASSIFICATION,
-        dataset_name='digits',
+        dataset_name="digits",
         ensemble_size=20,
         ensemble_nbest=50,
     )
@@ -215,7 +223,7 @@ def cli_start_worker(scheduler_file_name):
 # This is only necessary if the workers are started from within this python
 # script. In a real application one would start them directly from the command
 # line.
-if __name__ == '__main__':
+if __name__ == "__main__":
     process_python_worker.join()
     for process in worker_processes:
         process.join()
diff --git a/examples/60_search/example_parallel_manual_spawning_python.py b/examples/60_search/example_parallel_manual_spawning_python.py
index ed723598a9..75c5bcee30 100644
--- a/examples/60_search/example_parallel_manual_spawning_python.py
+++ b/examples/60_search/example_parallel_manual_spawning_python.py
@@ -58,7 +58,7 @@
 from autosklearn.classification import AutoSklearnClassifier
 from autosklearn.constants import MULTICLASS_CLASSIFICATION
 
-tmp_folder = '/tmp/autosklearn_parallel_2_example_tmp'
+tmp_folder = "/tmp/autosklearn_parallel_2_example_tmp"
 
 
 ############################################################################
@@ -73,8 +73,9 @@
 # https://docs.dask.org/en/latest/setup/python-advanced.html for further
 # information.
 
+
 def start_python_worker(scheduler_address):
-    dask.config.set({'distributed.worker.daemon': False})
+    dask.config.set({"distributed.worker.daemon": False})
 
     async def do_work():
         async with dask.distributed.Nanny(
@@ -97,14 +98,17 @@ async def do_work():
 # To use auto-sklearn in parallel we must guard the code with
 # ``if __name__ == '__main__'``. We then start a dask cluster as a context,
 # which means that it is automatically stopped once all computation is done.
-if __name__ == '__main__':
+if __name__ == "__main__":
     X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-    X_train, X_test, y_train, y_test = \
-        sklearn.model_selection.train_test_split(X, y, random_state=1)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X, y, random_state=1
+    )
 
     # 1. Create a dask scheduler (LocalCluster)
     with dask.distributed.LocalCluster(
-        n_workers=0, processes=True, threads_per_worker=1,
+        n_workers=0,
+        processes=True,
+        threads_per_worker=1,
     ) as cluster:
 
         # 2. Start the workers
@@ -114,7 +118,7 @@ async def do_work():
         for _ in range(2):
             process_python_worker = multiprocessing.Process(
                 target=start_python_worker,
-                args=(cluster.scheduler_address, ),
+                args=(cluster.scheduler_address,),
             )
             process_python_worker.start()
             worker_processes.append(process_python_worker)
@@ -141,7 +145,7 @@ async def do_work():
             automl.fit_ensemble(
                 y_train,
                 task=MULTICLASS_CLASSIFICATION,
-                dataset_name='digits',
+                dataset_name="digits",
                 ensemble_size=20,
                 ensemble_nbest=50,
             )
diff --git a/examples/60_search/example_parallel_n_jobs.py b/examples/60_search/example_parallel_n_jobs.py
index b7265ce3fa..1cb5014ca8 100644
--- a/examples/60_search/example_parallel_n_jobs.py
+++ b/examples/60_search/example_parallel_n_jobs.py
@@ -27,26 +27,27 @@
 # Data Loading
 # ============
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Build and fit a classifier
 # ==========================
 #
 # To use ``n_jobs_`` we must guard the code
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     automl = autosklearn.classification.AutoSklearnClassifier(
         time_left_for_this_task=120,
         per_run_time_limit=30,
-        tmp_folder='/tmp/autosklearn_parallel_1_example_tmp',
+        tmp_folder="/tmp/autosklearn_parallel_1_example_tmp",
         n_jobs=4,
         # Each one of the 4 jobs is allocated 3GB
         memory_limit=3072,
         seed=5,
     )
-    automl.fit(X_train, y_train, dataset_name='breast_cancer')
+    automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
     # Print statistics about the auto-sklearn run such as number of
     # iterations, number of models failed with a time out.
diff --git a/examples/60_search/example_random_search.py b/examples/60_search/example_random_search.py
index 2c9cc76695..520c8c18b0 100644
--- a/examples/60_search/example_random_search.py
+++ b/examples/60_search/example_random_search.py
@@ -29,8 +29,9 @@
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 
 ############################################################################
@@ -48,8 +49,10 @@ def get_roar_object_callback(
     """Random online adaptive racing."""
 
     if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
-        raise ValueError("Please make sure to guard the code invoking Auto-sklearn by "
-                         "`if __name__ == '__main__'` and remove this exception.")
+        raise ValueError(
+            "Please make sure to guard the code invoking Auto-sklearn by "
+            "`if __name__ == '__main__'` and remove this exception."
+        )
 
     scenario = Scenario(scenario_dict)
     return ROAR(
@@ -66,15 +69,15 @@ def get_roar_object_callback(
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=60,
     per_run_time_limit=15,
-    tmp_folder='/tmp/autosklearn_random_search_example_tmp',
+    tmp_folder="/tmp/autosklearn_random_search_example_tmp",
     initial_configurations_via_metalearning=0,
     # The callback to get the SMAC object
     get_smac_object_callback=get_roar_object_callback,
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
-print('#' * 80)
-print('Results for ROAR.')
+print("#" * 80)
+print("Results for ROAR.")
 # Print the final ensemble constructed by auto-sklearn via ROAR.
 pprint(automl.show_models(), indent=4)
 predictions = automl.predict(X_test)
@@ -88,22 +91,18 @@ def get_roar_object_callback(
 # Fit a classifier using Random Search
 # ====================================
 def get_random_search_object_callback(
-    scenario_dict,
-    seed,
-    ta,
-    ta_kwargs,
-    metalearning_configurations,
-    n_jobs,
-    dask_client
+    scenario_dict, seed, ta, ta_kwargs, metalearning_configurations, n_jobs, dask_client
 ):
-    """ Random search """
+    """Random search"""
 
     if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
-        raise ValueError("Please make sure to guard the code invoking Auto-sklearn by "
-                         "`if __name__ == '__main__'` and remove this exception.")
+        raise ValueError(
+            "Please make sure to guard the code invoking Auto-sklearn by "
+            "`if __name__ == '__main__'` and remove this exception."
+        )
 
-    scenario_dict['minR'] = len(scenario_dict['instances'])
-    scenario_dict['initial_incumbent'] = 'RANDOM'
+    scenario_dict["minR"] = len(scenario_dict["instances"])
+    scenario_dict["initial_incumbent"] = "RANDOM"
     scenario = Scenario(scenario_dict)
     return ROAR(
         scenario=scenario,
@@ -119,15 +118,15 @@ def get_random_search_object_callback(
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=60,
     per_run_time_limit=15,
-    tmp_folder='/tmp/autosklearn_random_search_example_tmp',
+    tmp_folder="/tmp/autosklearn_random_search_example_tmp",
     initial_configurations_via_metalearning=0,
     # Passing the callback to get the SMAC object
     get_smac_object_callback=get_random_search_object_callback,
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
-print('#' * 80)
-print('Results for random search.')
+print("#" * 80)
+print("Results for random search.")
 
 # Print the final ensemble constructed by auto-sklearn via random search.
 pprint(automl.show_models(), indent=4)
diff --git a/examples/60_search/example_sequential.py b/examples/60_search/example_sequential.py
index fad088396d..1ff63649da 100644
--- a/examples/60_search/example_sequential.py
+++ b/examples/60_search/example_sequential.py
@@ -22,8 +22,9 @@
 # ======================================
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1
+)
 
 ############################################################################
 # Build and fit the classifier
@@ -32,14 +33,14 @@
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
     per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_sequential_example_tmp',
+    tmp_folder="/tmp/autosklearn_sequential_example_tmp",
     # Do not construct ensembles in parallel to avoid using more than one
     # core at a time. The ensemble will be constructed after auto-sklearn
     # finished fitting all machine learning models.
     ensemble_size=0,
     delete_tmp_folder_after_terminate=False,
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 # This call to fit_ensemble uses all models trained in the previous call
 # to fit to build an ensemble which can be used with automl.predict()
diff --git a/examples/60_search/example_successive_halving.py b/examples/60_search/example_successive_halving.py
index fdb29da6e0..e57be7f157 100644
--- a/examples/60_search/example_successive_halving.py
+++ b/examples/60_search/example_successive_halving.py
@@ -27,6 +27,7 @@
 # Define a callback that instantiates SuccessiveHalving
 # =====================================================
 
+
 def get_smac_object_callback(budget_type):
     def get_smac_object(
         scenario_dict,
@@ -43,8 +44,10 @@ def get_smac_object(
         from smac.scenario.scenario import Scenario
 
         if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
-            raise ValueError("Please make sure to guard the code invoking Auto-sklearn by "
-                             "`if __name__ == '__main__'` and remove this exception.")
+            raise ValueError(
+                "Please make sure to guard the code invoking Auto-sklearn by "
+                "`if __name__ == '__main__'` and remove this exception."
+            )
 
         scenario = Scenario(scenario_dict)
         if len(metalearning_configurations) > 0:
@@ -54,7 +57,7 @@ def get_smac_object(
             initial_configurations = None
         rh2EPM = RunHistory2EPM4LogCost
 
-        ta_kwargs['budget_type'] = budget_type
+        ta_kwargs["budget_type"] = budget_type
 
         return SMAC4AC(
             scenario=scenario,
@@ -66,14 +69,15 @@ def get_smac_object(
             run_id=seed,
             intensifier=SuccessiveHalving,
             intensifier_kwargs={
-                'initial_budget': 10.0,
-                'max_budget': 100,
-                'eta': 2,
-                'min_chall': 1
+                "initial_budget": 10.0,
+                "max_budget": 100,
+                "eta": 2,
+                "min_chall": 1,
             },
             n_jobs=n_jobs,
             dask_client=dask_client,
         )
+
     return get_smac_object
 
 
@@ -82,8 +86,9 @@ def get_smac_object(
 # ============
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1, shuffle=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1, shuffle=True
+)
 
 ############################################################################
 # Build and fit a classifier
@@ -92,23 +97,26 @@ def get_smac_object(
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=40,
     per_run_time_limit=10,
-    tmp_folder='/tmp/autosklearn_sh_example_tmp',
+    tmp_folder="/tmp/autosklearn_sh_example_tmp",
     disable_evaluator_output=False,
     # 'holdout' with 'train_size'=0.67 is the default argument setting
     # for AutoSklearnClassifier. It is explicitly specified in this example
     # for demonstrational purpose.
-    resampling_strategy='holdout',
-    resampling_strategy_arguments={'train_size': 0.67},
+    resampling_strategy="holdout",
+    resampling_strategy_arguments={"train_size": 0.67},
     include={
-        'classifier': [
-            'extra_trees', 'gradient_boosting', 'random_forest',
-            'sgd', 'passive_aggressive'
+        "classifier": [
+            "extra_trees",
+            "gradient_boosting",
+            "random_forest",
+            "sgd",
+            "passive_aggressive",
         ],
-        'feature_preprocessor': ['no_preprocessing']
+        "feature_preprocessor": ["no_preprocessing"],
     },
-    get_smac_object_callback=get_smac_object_callback('iterations'),
+    get_smac_object_callback=get_smac_object_callback("iterations"),
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 pprint(automl.show_models(), indent=4)
 predictions = automl.predict(X_test)
@@ -122,25 +130,29 @@ def get_smac_object(
 # ========================================================
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1, shuffle=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1, shuffle=True
+)
 
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=40,
     per_run_time_limit=10,
-    tmp_folder='/tmp/autosklearn_sh_example_tmp_01',
+    tmp_folder="/tmp/autosklearn_sh_example_tmp_01",
     disable_evaluator_output=False,
-    resampling_strategy='cv',
+    resampling_strategy="cv",
     include={
-        'classifier': [
-            'extra_trees', 'gradient_boosting', 'random_forest', 
-            'sgd', 'passive_aggressive'
+        "classifier": [
+            "extra_trees",
+            "gradient_boosting",
+            "random_forest",
+            "sgd",
+            "passive_aggressive",
         ],
-        'feature_preprocessor': ['no_preprocessing']
+        "feature_preprocessor": ["no_preprocessing"],
     },
-    get_smac_object_callback=get_smac_object_callback('iterations'),
+    get_smac_object_callback=get_smac_object_callback("iterations"),
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 # Print the final ensemble constructed by auto-sklearn.
 pprint(automl.show_models(), indent=4)
@@ -156,25 +168,29 @@ def get_smac_object(
 # =============================================================
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1, shuffle=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1, shuffle=True
+)
 
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=40,
     per_run_time_limit=10,
-    tmp_folder='/tmp/autosklearn_sh_example_tmp_cv_02',
+    tmp_folder="/tmp/autosklearn_sh_example_tmp_cv_02",
     disable_evaluator_output=False,
-    resampling_strategy='cv-iterative-fit',
+    resampling_strategy="cv-iterative-fit",
     include={
-        'classifier': [
-            'extra_trees', 'gradient_boosting', 'random_forest',
-            'sgd', 'passive_aggressive'
+        "classifier": [
+            "extra_trees",
+            "gradient_boosting",
+            "random_forest",
+            "sgd",
+            "passive_aggressive",
         ],
-        'feature_preprocessor': ['no_preprocessing']
+        "feature_preprocessor": ["no_preprocessing"],
     },
-    get_smac_object_callback=get_smac_object_callback('iterations'),
+    get_smac_object_callback=get_smac_object_callback("iterations"),
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 # Print the final ensemble constructed by auto-sklearn.
 pprint(automl.show_models(), indent=4)
@@ -190,22 +206,23 @@ def get_smac_object(
 # ===============================================================
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1, shuffle=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1, shuffle=True
+)
 
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=40,
     per_run_time_limit=10,
-    tmp_folder='/tmp/autosklearn_sh_example_tmp_03',
+    tmp_folder="/tmp/autosklearn_sh_example_tmp_03",
     disable_evaluator_output=False,
     # 'holdout' with 'train_size'=0.67 is the default argument setting
     # for AutoSklearnClassifier. It is explicitly specified in this example
     # for demonstrational purpose.
-    resampling_strategy='holdout',
-    resampling_strategy_arguments={'train_size': 0.67},
-    get_smac_object_callback=get_smac_object_callback('subsample'),
+    resampling_strategy="holdout",
+    resampling_strategy_arguments={"train_size": 0.67},
+    get_smac_object_callback=get_smac_object_callback("subsample"),
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 # Print the final ensemble constructed by auto-sklearn.
 pprint(automl.show_models(), indent=4)
@@ -222,27 +239,26 @@ def get_smac_object(
 # subsamples otherwise
 
 X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1, shuffle=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, random_state=1, shuffle=True
+)
 
 automl = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=40,
     per_run_time_limit=10,
-    tmp_folder='/tmp/autosklearn_sh_example_tmp_04',
+    tmp_folder="/tmp/autosklearn_sh_example_tmp_04",
     disable_evaluator_output=False,
     # 'holdout' with 'train_size'=0.67 is the default argument setting
     # for AutoSklearnClassifier. It is explicitly specified in this example
     # for demonstrational purpose.
-    resampling_strategy='holdout',
-    resampling_strategy_arguments={'train_size': 0.67},
+    resampling_strategy="holdout",
+    resampling_strategy_arguments={"train_size": 0.67},
     include={
-        'classifier': [
-            'extra_trees', 'gradient_boosting', 'random_forest', 'sgd'
-        ]
+        "classifier": ["extra_trees", "gradient_boosting", "random_forest", "sgd"]
     },
-    get_smac_object_callback=get_smac_object_callback('mixed'),
+    get_smac_object_callback=get_smac_object_callback("mixed"),
 )
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
+automl.fit(X_train, y_train, dataset_name="breast_cancer")
 
 # Print the final ensemble constructed by auto-sklearn.
 pprint(automl.show_models(), indent=4)
diff --git a/examples/80_extending/example_extending_classification.py b/examples/80_extending/example_extending_classification.py
index b6132f4c18..b5112c022b 100644
--- a/examples/80_extending/example_extending_classification.py
+++ b/examples/80_extending/example_extending_classification.py
@@ -9,16 +9,22 @@
 from pprint import pprint
 
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter, UniformFloatHyperparameter
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformIntegerHyperparameter,
+    UniformFloatHyperparameter,
+)
 
 import sklearn.metrics
 import autosklearn.classification
 import autosklearn.pipeline.components.classification
-from autosklearn.pipeline.components.base \
-    import AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \
-    PREDICTIONS
+from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import (
+    DENSE,
+    SIGNED_DATA,
+    UNSIGNED_DATA,
+    PREDICTIONS,
+)
 
 from sklearn.datasets import load_breast_cancer
 from sklearn.model_selection import train_test_split
@@ -28,8 +34,8 @@
 # Create MLP classifier component for auto-sklearn
 # ================================================
 
-class MLPClassifier(AutoSklearnClassificationAlgorithm):
 
+class MLPClassifier(AutoSklearnClassificationAlgorithm):
     def __init__(
         self,
         hidden_layer_depth,
@@ -52,15 +58,18 @@ def fit(self, X, y):
         self.alpha = float(self.alpha)
 
         from sklearn.neural_network import MLPClassifier
+
         hidden_layer_sizes = tuple(
             self.num_nodes_per_layer for i in range(self.hidden_layer_depth)
         )
 
-        self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
-                                       activation=self.activation,
-                                       alpha=self.alpha,
-                                       solver=self.solver,
-                                       random_state=self.random_state)
+        self.estimator = MLPClassifier(
+            hidden_layer_sizes=hidden_layer_sizes,
+            activation=self.activation,
+            alpha=self.alpha,
+            solver=self.solver,
+            random_state=self.random_state,
+        )
         self.estimator.fit(X, y)
         return self
 
@@ -77,17 +86,17 @@ def predict_proba(self, X):
     @staticmethod
     def get_properties(dataset_properties=None):
         return {
-            'shortname': 'MLP Classifier',
-            'name': 'MLP CLassifier',
-            'handles_regression': False,
-            'handles_classification': True,
-            'handles_multiclass': True,
-            'handles_multilabel': False,
-            'handles_multioutput': False,
-            'is_deterministic': False,
+            "shortname": "MLP Classifier",
+            "name": "MLP CLassifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": False,
             # Both input and output must be tuple(iterable)
-            'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA],
-            'output': [PREDICTIONS]
+            "input": [DENSE, SIGNED_DATA, UNSIGNED_DATA],
+            "output": [PREDICTIONS],
         }
 
     @staticmethod
@@ -100,18 +109,25 @@ def get_hyperparameter_search_space(dataset_properties=None):
             name="num_nodes_per_layer", lower=16, upper=216, default_value=32
         )
         activation = CategoricalHyperparameter(
-            name="activation", choices=['identity', 'logistic', 'tanh', 'relu'],
-            default_value='relu'
+            name="activation",
+            choices=["identity", "logistic", "tanh", "relu"],
+            default_value="relu",
         )
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.0001, upper=1.0, default_value=0.0001
         )
         solver = CategoricalHyperparameter(
-            name="solver", choices=['lbfgs', 'sgd', 'adam'], default_value='adam'
+            name="solver", choices=["lbfgs", "sgd", "adam"], default_value="adam"
+        )
+        cs.add_hyperparameters(
+            [
+                hidden_layer_depth,
+                num_nodes_per_layer,
+                activation,
+                alpha,
+                solver,
+            ]
         )
-        cs.add_hyperparameters([
-            hidden_layer_depth, num_nodes_per_layer, activation, alpha, solver,
-        ])
         return cs
 
 
@@ -134,13 +150,11 @@ def get_hyperparameter_search_space(dataset_properties=None):
 clf = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=30,
     per_run_time_limit=10,
-    include={
-        'classifier': ['MLPClassifier']
-    },
+    include={"classifier": ["MLPClassifier"]},
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 5},
+    smac_scenario_args={"runcount_limit": 5},
 )
 clf.fit(X_train, y_train)
 
diff --git a/examples/80_extending/example_extending_data_preprocessor.py b/examples/80_extending/example_extending_data_preprocessor.py
index 7fdd72e971..aa5c443255 100644
--- a/examples/80_extending/example_extending_data_preprocessor.py
+++ b/examples/80_extending/example_extending_data_preprocessor.py
@@ -21,9 +21,8 @@
 # Create NoPreprocessing component for auto-sklearn
 # =================================================
 class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
-
     def __init__(self, **kwargs):
-        """ This preprocessors does not change the data """
+        """This preprocessors does not change the data"""
         # Some internal checks makes sure parameters are set
         for key, val in kwargs.items():
             setattr(self, key, val)
@@ -37,16 +36,16 @@ def transform(self, X):
     @staticmethod
     def get_properties(dataset_properties=None):
         return {
-            'shortname': 'NoPreprocessing',
-            'name': 'NoPreprocessing',
-            'handles_regression': True,
-            'handles_classification': True,
-            'handles_multiclass': True,
-            'handles_multilabel': True,
-            'handles_multioutput': True,
-            'is_deterministic': True,
-            'input': (SPARSE, DENSE, UNSIGNED_DATA),
-            'output': (INPUT,)
+            "shortname": "NoPreprocessing",
+            "name": "NoPreprocessing",
+            "handles_regression": True,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA),
+            "output": (INPUT,),
         }
 
     @staticmethod
@@ -70,13 +69,11 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
 clf = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=120,
-    include={
-        'data_preprocessor': ['NoPreprocessing']
-    },
+    include={"data_preprocessor": ["NoPreprocessing"]},
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 5},
+    smac_scenario_args={"runcount_limit": 5},
 )
 clf.fit(X_train, y_train)
 
diff --git a/examples/80_extending/example_extending_preprocessor.py b/examples/80_extending/example_extending_preprocessor.py
index 9ac93a45b3..1eb3fc1daf 100644
--- a/examples/80_extending/example_extending_preprocessor.py
+++ b/examples/80_extending/example_extending_preprocessor.py
@@ -10,16 +10,17 @@
 from pprint import pprint
 
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, CategoricalHyperparameter
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+    CategoricalHyperparameter,
+)
 from ConfigSpace.conditions import InCondition
 
 import sklearn.metrics
 import autosklearn.classification
 import autosklearn.pipeline.components.feature_preprocessing
-from autosklearn.pipeline.components.base \
-    import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, \
-    UNSIGNED_DATA
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA
 from autosklearn.util.common import check_none
 
 from sklearn.datasets import load_breast_cancer
@@ -30,7 +31,6 @@
 # Create LDA component for auto-sklearn
 # =====================================
 class LDA(AutoSklearnPreprocessingAlgorithm):
-
     def __init__(self, solver, tol, shrinkage=None, random_state=None):
         self.solver = solver
         self.shrinkage = shrinkage
@@ -46,6 +46,7 @@ def fit(self, X, y=None):
         self.tol = float(self.tol)
 
         import sklearn.discriminant_analysis
+
         self.preprocessor = sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
             shrinkage=self.shrinkage,
             solver=self.solver,
@@ -62,23 +63,23 @@ def transform(self, X):
     @staticmethod
     def get_properties(dataset_properties=None):
         return {
-            'shortname': 'LDA',
-            'name': 'Linear Discriminant Analysis',
-            'handles_regression': False,
-            'handles_classification': True,
-            'handles_multiclass': False,
-            'handles_multilabel': False,
-            'handles_multioutput': False,
-            'is_deterministic': True,
-            'input': (DENSE, UNSIGNED_DATA, SIGNED_DATA),
-            'output': (DENSE, UNSIGNED_DATA, SIGNED_DATA)
+            "shortname": "LDA",
+            "name": "Linear Discriminant Analysis",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, UNSIGNED_DATA, SIGNED_DATA),
+            "output": (DENSE, UNSIGNED_DATA, SIGNED_DATA),
         }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         solver = CategoricalHyperparameter(
-            name="solver", choices=['svd', 'lsqr', 'eigen'], default_value='svd'
+            name="solver", choices=["svd", "lsqr", "eigen"], default_value="svd"
         )
         shrinkage = UniformFloatHyperparameter(
             name="shrinkage", lower=0.0, upper=1.0, default_value=0.5
@@ -87,7 +88,7 @@ def get_hyperparameter_search_space(dataset_properties=None):
             name="tol", lower=0.0001, upper=1, default_value=0.0001
         )
         cs.add_hyperparameters([solver, shrinkage, tol])
-        shrinkage_condition = InCondition(shrinkage, solver, ['lsqr', 'eigen'])
+        shrinkage_condition = InCondition(shrinkage, solver, ["lsqr", "eigen"])
         cs.add_condition(shrinkage_condition)
         return cs
 
@@ -115,13 +116,11 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
 clf = autosklearn.classification.AutoSklearnClassifier(
     time_left_for_this_task=30,
-    include={
-        'feature_preprocessor': ['LDA']
-    },
+    include={"feature_preprocessor": ["LDA"]},
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 5},
+    smac_scenario_args={"runcount_limit": 5},
 )
 clf.fit(X_train, y_train)
 
diff --git a/examples/80_extending/example_extending_regression.py b/examples/80_extending/example_extending_regression.py
index 3bdc008d4e..4d6987a9db 100644
--- a/examples/80_extending/example_extending_regression.py
+++ b/examples/80_extending/example_extending_regression.py
@@ -9,16 +9,24 @@
 from pprint import pprint
 
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
-    UniformIntegerHyperparameter, CategoricalHyperparameter
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+    CategoricalHyperparameter,
+)
 from ConfigSpace.conditions import EqualsCondition
 
 import sklearn.metrics
 import autosklearn.regression
 import autosklearn.pipeline.components.regression
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, \
-    SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS
+from autosklearn.pipeline.constants import (
+    SPARSE,
+    DENSE,
+    SIGNED_DATA,
+    UNSIGNED_DATA,
+    PREDICTIONS,
+)
 
 from sklearn.datasets import load_diabetes
 from sklearn.model_selection import train_test_split
@@ -28,8 +36,8 @@
 # Implement kernel ridge regression component for auto-sklearn
 # ============================================================
 
-class KernelRidgeRegression(AutoSklearnRegressionAlgorithm):
 
+class KernelRidgeRegression(AutoSklearnRegressionAlgorithm):
     def __init__(self, alpha, kernel, gamma, degree, coef0, random_state=None):
         self.alpha = alpha
         self.kernel = kernel
@@ -46,12 +54,13 @@ def fit(self, X, y):
         self.coef0 = float(self.coef0)
 
         import sklearn.kernel_ridge
+
         self.estimator = sklearn.kernel_ridge.KernelRidge(
             alpha=self.alpha,
             kernel=self.kernel,
             gamma=self.gamma,
             degree=self.degree,
-            coef0=self.coef0
+            coef0=self.coef0,
         )
         self.estimator.fit(X, y)
         return self
@@ -64,42 +73,46 @@ def predict(self, X):
     @staticmethod
     def get_properties(dataset_properties=None):
         return {
-            'shortname': 'KRR',
-            'name': 'Kernel Ridge Regression',
-            'handles_regression': True,
-            'handles_classification': False,
-            'handles_multiclass': False,
-            'handles_multilabel': False,
-            'handles_multioutput': True,
-            'is_deterministic': True,
-            'input': (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA),
-            'output': (PREDICTIONS,)
+            "shortname": "KRR",
+            "name": "Kernel Ridge Regression",
+            "handles_regression": True,
+            "handles_classification": False,
+            "handles_multiclass": False,
+            "handles_multilabel": False,
+            "handles_multioutput": True,
+            "is_deterministic": True,
+            "input": (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA),
+            "output": (PREDICTIONS,),
         }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
         cs = ConfigurationSpace()
         alpha = UniformFloatHyperparameter(
-            name='alpha', lower=10 ** -5, upper=1, log=True, default_value=1.0
+            name="alpha", lower=10**-5, upper=1, log=True, default_value=1.0
         )
         kernel = CategoricalHyperparameter(
-            name='kernel',
+            name="kernel",
             # We restrict ourselves to two possible kernels for this example
-            choices=['polynomial', 'rbf'],
-            default_value='polynomial'
+            choices=["polynomial", "rbf"],
+            default_value="polynomial",
         )
         gamma = UniformFloatHyperparameter(
-            name='gamma', lower=0.00001, upper=1, default_value=0.1, log=True
+            name="gamma", lower=0.00001, upper=1, default_value=0.1, log=True
         )
         degree = UniformIntegerHyperparameter(
-            name='degree', lower=2, upper=5, default_value=3
+            name="degree", lower=2, upper=5, default_value=3
         )
         coef0 = UniformFloatHyperparameter(
-            name='coef0', lower=1e-2, upper=1e2, log=True, default_value=1,
+            name="coef0",
+            lower=1e-2,
+            upper=1e2,
+            log=True,
+            default_value=1,
         )
         cs.add_hyperparameters([alpha, kernel, gamma, degree, coef0])
-        degree_condition = EqualsCondition(degree, kernel, 'polynomial')
-        coef0_condition = EqualsCondition(coef0, kernel, 'polynomial')
+        degree_condition = EqualsCondition(degree, kernel, "polynomial")
+        coef0_condition = EqualsCondition(coef0, kernel, "polynomial")
         cs.add_conditions([degree_condition, coef0_condition])
         return cs
 
@@ -123,13 +136,11 @@ def get_hyperparameter_search_space(dataset_properties=None):
 reg = autosklearn.regression.AutoSklearnRegressor(
     time_left_for_this_task=30,
     per_run_time_limit=10,
-    include={
-        'regressor': ['KernelRidgeRegression']
-    },
+    include={"regressor": ["KernelRidgeRegression"]},
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 5},
+    smac_scenario_args={"runcount_limit": 5},
 )
 reg.fit(X_train, y_train)
 
diff --git a/examples/80_extending/example_restrict_number_of_hyperparameters.py b/examples/80_extending/example_restrict_number_of_hyperparameters.py
index 9c6ec2501f..d8bd2f4a98 100644
--- a/examples/80_extending/example_restrict_number_of_hyperparameters.py
+++ b/examples/80_extending/example_restrict_number_of_hyperparameters.py
@@ -9,15 +9,19 @@
 """
 
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import UniformIntegerHyperparameter, UniformFloatHyperparameter
+from ConfigSpace.hyperparameters import (
+    UniformIntegerHyperparameter,
+    UniformFloatHyperparameter,
+)
 
 from sklearn.datasets import load_breast_cancer
 from sklearn.model_selection import train_test_split
 
 import autosklearn.classification
 import autosklearn.pipeline.components.classification
-from autosklearn.pipeline.components.classification \
-    import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.components.classification import (
+    AutoSklearnClassificationAlgorithm,
+)
 from autosklearn.pipeline.constants import DENSE, UNSIGNED_DATA, PREDICTIONS, SPARSE
 
 
@@ -29,8 +33,8 @@
 # default parametrization (``max_features``). Instead, it also
 # tunes the number of estimators (``n_estimators``).
 
-class CustomRandomForest(AutoSklearnClassificationAlgorithm):
 
+class CustomRandomForest(AutoSklearnClassificationAlgorithm):
     def __init__(self, n_estimators, max_features, random_state=None):
         self.n_estimators = n_estimators
         self.max_features = max_features
@@ -67,16 +71,16 @@ def predict_proba(self, X):
     @staticmethod
     def get_properties(dataset_properties=None):
         return {
-            'shortname': 'RF',
-            'name': 'Random Forest Classifier',
-            'handles_regression': False,
-            'handles_classification': True,
-            'handles_multiclass': True,
-            'handles_multilabel': True,
-            'handles_multioutput': False,
-            'is_deterministic': True,
-            'input': (DENSE, SPARSE, UNSIGNED_DATA),
-            'output': (PREDICTIONS,)
+            "shortname": "RF",
+            "name": "Random Forest Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
         }
 
     @staticmethod
@@ -87,8 +91,12 @@ def get_hyperparameter_search_space(dataset_properties=None):
         # m is the total number of features, and max_features is the hyperparameter specified below.
         # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
         # corresponds with Geurts' heuristic.
-        max_features = UniformFloatHyperparameter("max_features", 0., 1., default_value=0.5)
-        n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 1000, default_value=100)
+        max_features = UniformFloatHyperparameter(
+            "max_features", 0.0, 1.0, default_value=0.5
+        )
+        n_estimators = UniformIntegerHyperparameter(
+            "n_estimators", 10, 1000, default_value=100
+        )
 
         cs.add_hyperparameters([max_features, n_estimators])
         return cs
@@ -114,13 +122,11 @@ def get_hyperparameter_search_space(dataset_properties=None):
     time_left_for_this_task=30,
     per_run_time_limit=10,
     # Here we exclude auto-sklearn's default random forest component
-    exclude={
-        'classifier': ['random_forest']
-    },
+    exclude={"classifier": ["random_forest"]},
     # Bellow two flags are provided to speed up calculations
     # Not recommended for a real implementation
     initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 1},
+    smac_scenario_args={"runcount_limit": 1},
 )
 clf.fit(X_train, y_train)
 
@@ -131,5 +137,5 @@ def get_hyperparameter_search_space(dataset_properties=None):
 # Observe that this configuration space only contains our custom random
 # forest, but not auto-sklearn's ``random_forest``
 cs = clf.get_configuration_space(X_train, y_train)
-assert 'random_forest' not in str(cs)
+assert "random_forest" not in str(cs)
 print(cs)
diff --git a/misc/create_hyperparameter_table.py b/misc/create_hyperparameter_table.py
index dd23f8ac29..7495ee686a 100644
--- a/misc/create_hyperparameter_table.py
+++ b/misc/create_hyperparameter_table.py
@@ -15,43 +15,43 @@
 CONST = "constant"
 UN = "unparameterized"
 
-template_string = \
-"""
+template_string = r"""
 \documentclass{article} %% For LaTeX2
 \usepackage[a4paper, left=5mm, right=5mm, top=5mm, bottom=5mm]{geometry}
 
-%%\\usepackage[landscape]{geometry}
-\\usepackage{multirow}           %% import command \multicolmun
-\\usepackage{tabularx}           %% Convenient table formatting
-\\usepackage{booktabs}           %% provides \\toprule, \midrule and \\bottomrule
+%%\usepackage[landscape]{geometry}
+\usepackage{multirow}           %% import command \multicolmun
+\usepackage{tabularx}           %% Convenient table formatting
+\usepackage{booktabs}           %% provides \\toprule, \midrule and \\bottomrule
 
-\\begin{document}
+\begin{document}
 
 %s
 
-\\end{document}
+\end{document}
 """
 
-caption_str = "Number of Hyperparameters for each possible %s " \
-              "for a dataset with these properties: %s"
-
-table_str = \
-"""
-\\begin{table}[t!]
-\\centering
-\\scriptsize
-\\caption{ %s }
-\\begin{tabularx}{\\textwidth}{ X X X X X X }
-\\toprule
-name & \#$\lambda$ & cat (cond) & cont (cond) & const & un \\\\
-\\toprule
-\\\\
+caption_str = (
+    "Number of Hyperparameters for each possible %s "
+    "for a dataset with these properties: %s"
+)
+
+table_str = r"""
+\begin{table}[t!]
+\centering
+\scriptsize
+\caption{ %s }
+\begin{tabularx}{\textwidth}{ X X X X X X }
+\toprule
+name & \#$\lambda$ & cat (cond) & cont (cond) & const & un \\
+\toprule
+\\
 %s
-\\\\
-\\toprule
-\\bottomrule
-\\end{tabularx}
-\\end{table}
+\\
+\toprule
+\bottomrule
+\end{tabularx}
+\end{table}
 """
 
 
@@ -59,11 +59,13 @@ def get_dict(task_type="classifier", **kwargs):
     assert task_type in ("classifier", "regressor")
 
     if task_type == "classifier":
-        cs = autosklearn.pipeline.classification.SimpleClassificationPipeline\
-            .get_hyperparameter_search_space(dataset_properties=kwargs)
+        cs = autosklearn.pipeline.classification.SimpleClassificationPipeline.get_hyperparameter_search_space(
+            dataset_properties=kwargs
+        )
     elif task_type == "regressor":
-        cs = autosklearn.pipeline.regression.SimpleRegressionPipeline\
-            .get_hyperparameter_search_space(dataset_properties=kwargs)
+        cs = autosklearn.pipeline.regression.SimpleRegressionPipeline.get_hyperparameter_search_space(
+            dataset_properties=kwargs
+        )
     else:
         raise ValueError("'task_type' is not in ('classifier', 'regressor')")
 
@@ -73,7 +75,7 @@ def get_dict(task_type="classifier", **kwargs):
     for h in cs.get_hyperparameters():
         if h.name == "feature_preprocessor:__choice__":
             preprocessor = h
-        elif h.name == (task_type + ':__choice__'):
+        elif h.name == (task_type + ":__choice__"):
             estimator = h
 
     if estimator is None:
@@ -100,8 +102,9 @@ def get_dict(task_type="classifier", **kwargs):
         preprocessor_dict[i][UN] = 0
 
     for h in cs.get_hyperparameters():
-        if h.name == "feature_preprocessor:__choice__" or \
-                h.name == (task_type + ':__choice__'):
+        if h.name == "feature_preprocessor:__choice__" or h.name == (
+            task_type + ":__choice__"
+        ):
             continue
         # walk over both dicts
         for d in (estimator_dict, preprocessor_dict):
@@ -116,14 +119,18 @@ def get_dict(task_type="classifier", **kwargs):
                 d[est][CAT] += 1
             elif isinstance(h, ConfigSpace.hyperparameters.Constant):
                 d[est][CONST] += 1
-            elif isinstance(h, ConfigSpace.hyperparameters.UnParametrizedHyperparameter):
+            elif isinstance(
+                h, ConfigSpace.hyperparameters.UnParametrizedHyperparameter
+            ):
                 d[est][UN] += 1
             else:
                 raise ValueError("Don't know that type: %s" % type(h))
 
     for h in cs.get_conditions():
-        if h.parent.name == (task_type + ':__choice__') or h.parent.name == \
-                "feature_preprocessor:__choice__":
+        if (
+            h.parent.name == (task_type + ":__choice__")
+            or h.parent.name == "feature_preprocessor:__choice__"
+        ):
             # ignore this condition
             # print "IGNORE", h
             continue
@@ -132,22 +139,30 @@ def get_dict(task_type="classifier", **kwargs):
         for d in (estimator_dict, preprocessor_dict):
             est = h.child.name.split(":")[1]
             if est not in d:
-                #print "Could not find %s" % est
+                # print "Could not find %s" % est
                 continue
 
-            #print "####"
-            #print vars(h)
-            #print h.parent
-            #print type(h)
-            if isinstance(h.child, ConfigSpace.hyperparameters.UniformIntegerHyperparameter):
+            # print "####"
+            # print vars(h)
+            # print h.parent
+            # print type(h)
+            if isinstance(
+                h.child, ConfigSpace.hyperparameters.UniformIntegerHyperparameter
+            ):
                 d[est][COND][CONT] += 1
-            elif isinstance(h.child, ConfigSpace.hyperparameters.UniformFloatHyperparameter):
+            elif isinstance(
+                h.child, ConfigSpace.hyperparameters.UniformFloatHyperparameter
+            ):
                 d[est][COND][CONT] += 1
-            elif isinstance(h.child, ConfigSpace.hyperparameters.CategoricalHyperparameter):
+            elif isinstance(
+                h.child, ConfigSpace.hyperparameters.CategoricalHyperparameter
+            ):
                 d[est][COND][CAT] += 1
             elif isinstance(h.child, ConfigSpace.hyperparameters.Constant):
                 d[est][COND][CONST] += 1
-            elif isinstance(h.child, ConfigSpace.hyperparameters.UnParametrizedHyperparameter):
+            elif isinstance(
+                h.child, ConfigSpace.hyperparameters.UnParametrizedHyperparameter
+            ):
                 d[est][COND][UN] += 1
             else:
                 raise ValueError("Don't know that type: %s" % type(h))
@@ -159,7 +174,11 @@ def build_table(d):
     lines = list()
     for est in d.keys():
         sum_ = 0
-        t_list = list([est.replace("_", " "), ])
+        t_list = list(
+            [
+                est.replace("_", " "),
+            ]
+        )
         for t in (CAT, CONT):
             sum_ += d[est][t]
             t_list.append("%d (%d)" % (d[est][t], d[est][COND][t]))
@@ -175,33 +194,68 @@ def main():
     parser = ArgumentParser()
 
     # General Options
-    parser.add_argument("-s", "--save", dest="save", default=None,
-                        help="Where to save plot instead of showing it?")
-    parser.add_argument("-t", "--type", dest="task_type", default="classifier",
-                        choices=("classifier", ), help="Type of dataset")
-    parser.add_argument("--sparse", dest="sparse", default=False,
-                        action="store_true", help="dataset property")
+    parser.add_argument(
+        "-s",
+        "--save",
+        dest="save",
+        default=None,
+        help="Where to save plot instead of showing it?",
+    )
+    parser.add_argument(
+        "-t",
+        "--type",
+        dest="task_type",
+        default="classifier",
+        choices=("classifier",),
+        help="Type of dataset",
+    )
+    parser.add_argument(
+        "--sparse",
+        dest="sparse",
+        default=False,
+        action="store_true",
+        help="dataset property",
+    )
     prop = parser.add_mutually_exclusive_group(required=True)
-    prop.add_argument("--multilabel", dest="multilabel", default=False,
-                      action="store_true", help="dataset property")
-    prop.add_argument("--multiclass", dest="multiclass", default=False,
-                      action="store_true", help="dataset property")
-    prop.add_argument("--binary", dest="binary", default=False,
-                      action="store_true", help="dataset property")
+    prop.add_argument(
+        "--multilabel",
+        dest="multilabel",
+        default=False,
+        action="store_true",
+        help="dataset property",
+    )
+    prop.add_argument(
+        "--multiclass",
+        dest="multiclass",
+        default=False,
+        action="store_true",
+        help="dataset property",
+    )
+    prop.add_argument(
+        "--binary",
+        dest="binary",
+        default=False,
+        action="store_true",
+        help="dataset property",
+    )
 
     args, unknown = parser.parse_known_args()
 
-    props = {"sparse": args.sparse,
-             "multilabel": args.multilabel,
-             "multiclass": args.multiclass}
+    props = {
+        "sparse": args.sparse,
+        "multilabel": args.multilabel,
+        "multiclass": args.multiclass,
+    }
     est_dict, preproc_dict = get_dict(task_type=args.task_type, **props)
 
     est_table = build_table(est_dict)
     preproc_table = build_table(preproc_dict)
 
     est_table = table_str % (caption_str % (args.task_type, str(props)), est_table)
-    preproc_table = table_str % (caption_str % (
-        "feature_preprocessor", str(props)), preproc_table)
+    preproc_table = table_str % (
+        caption_str % ("feature_preprocessor", str(props)),
+        preproc_table,
+    )
 
     tex_doc = template_string % "\n".join([est_table, preproc_table])
     if args.save is None:
@@ -210,7 +264,7 @@ def main():
         fh = open(args.save, "w")
         fh.write(tex_doc)
         fh.close()
-        proc = subprocess.Popen(shlex.split('pdflatex %s' % args.save))
+        proc = subprocess.Popen(shlex.split("pdflatex %s" % args.save))
         proc.communicate()
         try:
             os.remove(args.save.replace(".tex", ".aux"))
@@ -221,4 +275,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/misc/create_list_of_potential_models.py b/misc/create_list_of_potential_models.py
index 8153c639e7..cec7959ab1 100644
--- a/misc/create_list_of_potential_models.py
+++ b/misc/create_list_of_potential_models.py
@@ -5,30 +5,32 @@
 
 import sklearn.base
 
-files = glob.glob(os.path.join(os.path.dirname(sklearn.__file__), "**/*.py"),
-                  recursive=True)
+files = glob.glob(
+    os.path.join(os.path.dirname(sklearn.__file__), "**/*.py"), recursive=True
+)
+
 
 def find_all(cls):
     found = set()
     for file in files:
-        parts = file.split('/')
-        parts[-1] = parts[-1].replace('.py', '')
-        sklearn_dir = parts.index('sklearn')
-        name = '.'.join(parts[sklearn_dir:])
+        parts = file.split("/")
+        parts[-1] = parts[-1].replace(".py", "")
+        sklearn_dir = parts.index("sklearn")
+        name = ".".join(parts[sklearn_dir:])
         module = importlib.import_module(name)
         for member in module.__dict__.values():
             if not inspect.isclass(member):
                 continue
             if issubclass(member, cls):
                 found.add(member)
-    print('#####')
+    print("#####")
     found = list(found)
     found.sort(key=lambda t: str(t))
     for f in found:
         print(f)
     return found
 
-#classifiers = find_all(sklearn.base.ClassifierMixin)
-#regressors = find_all(sklearn.base.RegressorMixin)
-preprocs = find_all(sklearn.base.TransformerMixin)
 
+# classifiers = find_all(sklearn.base.ClassifierMixin)
+# regressors = find_all(sklearn.base.RegressorMixin)
+preprocs = find_all(sklearn.base.TransformerMixin)
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 8c8b6589af..0000000000
--- a/mypy.ini
+++ /dev/null
@@ -1,8 +0,0 @@
-[mypy]
-# Reports any config lines that are not recognized
-warn_unused_configs=True
-ignore_missing_imports=True
-follow_imports=skip
-disallow_untyped_defs=True
-disallow_incomplete_defs=True
-disallow_untyped_decorators=True
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000..0e48e3fc5f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,152 @@
+# For TOML reference
+# https://learnxinyminutes.com/docs/toml/
+
+[tool.pytest.ini_options]
+testpaths = ["test"]
+minversion = "3.7"
+#addopts = "--cov=autosklearn"
+
+[tool.coverage.run]
+branch = true
+context = "autosklearn"
+
+[tool.coverage.report]
+show_missing = true
+skip_covered = true
+exclude_lines = [
+    "pragma: no cover",
+    '\.\.\.',
+    "raise NotImplementedError",
+    "if TYPE_CHECKING"
+]
+
+[tool.black]
+target-version = ['py37']
+
+[tool.isort]
+py_version = "37"
+profile = "black" # Play nicely with black
+src_paths = ["autosklearn", "test"]
+known_types = ["typing", "abc"] # We put these in their own section TYPES
+known_first_party = ["autosklearn"] # Say that autosklearn is FIRSTPARTY
+known_test = ["test"] # Say that test.* is TEST
+sections = ["FUTURE", "TYPES", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "TEST", "LOCALFOLDER"] # section ordering
+multi_line_output = 3 # https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html
+
+[tool.pydocstyle]
+convention = "numpy"
+add-ignore = [ # http://www.pydocstyle.org/en/stable/error_codes.html
+    "D100", # Missing docstring in public module
+    "D101", # Missing docstring in public class
+    "D104", # Missing docstring in public package
+    "D105", # Missing docstring in magic method
+
+    "D203", # 1 blank line required before class docstring
+    "D205", # 1 blank line required between summary and description
+    "D210", # No whitespaces allowed surrounding docstring text
+    "D212", # Multi-line docstring summary should start at the first line
+    "D213", # Multi-line docstring summary should start at the second line
+
+    "D400", # First line should end with a period
+    "D401", # First line should be in imperative mood
+    "D404", # First word of the docstring should not be "This"
+    "D413", # Missing blank line after last section
+    "D415"  # First line should end with a period, question mark, or exclamation point
+]
+
+[tool.mypy]
+python_version = "3.7"
+
+show_error_codes = true
+
+warn_unused_configs = true  # warn about unused [tool.mypy] lines
+
+follow_imports = "normal" # Type check top level api code we use from imports
+ignore_missing_imports = false # prefer explicit ignores
+
+disallow_untyped_defs = true # All functions must have types
+disallow_untyped_decorators = true # ... even decorators
+disallow_incomplete_defs = true # ...all types
+
+# This is a problem with the tests of `automl_common` being distributed as a submodule
+# probably indicative that is should be a package.
+exclude = "autosklearn/automl_common/test"
+
+# This is handled by automl_common itself in its own CI
+[[tool.mypy.overrides]]
+module = ["autosklearn.automl_common.common.*"]
+ignore_errors = true
+
+# Submodules that need to be updated with mypy
+[[tool.mypy.overrides]]
+module = [
+    "autosklearn", #__init__
+    "autosklearn.estimators",
+    "autosklearn.automl",
+    "autosklearn.smbo",
+    "autosklearn.experimental.askl2",
+    "autosklearn.ensemble_builder",
+    "autosklearn.ensembles.singlebest_ensemble",
+    "autosklearn.ensembles.ensemble_selection",
+    "autosklearn.evaluation", #__init__
+    "autosklearn.evaluation.abstract_evaluator",
+    "autosklearn.evaluation.test_evaluator",
+    "autosklearn.evaluation.train_evaluator",
+    "autosklearn.metalearning.input.aslib_simple",
+    "autosklearn.metalearning.mismbo",
+    "autosklearn.metalearning.metafeatures.metafeature",
+    "autosklearn.metalearning.metafeatures.metafeatures",
+    "autosklearn.metalearning.metalearning.meta_base",
+    "autosklearn.metalearning.metalearning.metrics.misc",
+    "autosklearn.metalearning.metalearning.create_datasets",
+    "autosklearn.metalearning.metalearning.kNearestDatasets.kND",
+    "autosklearn.metalearning.metalearning.clustering.gmeans",
+    "autosklearn.metalearning.optimizers.optimizer_base",
+    "autosklearn.metalearning.optimizers.metalearn_optimizer.metalearn_optimizer_parser",
+    "autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner",
+    "autosklearn.pipeline.base",
+    "autosklearn.pipeline.classification",
+    "autosklearn.pipeline.regression",
+    "autosklearn.pipeline.components.base",
+    "autosklearn.pipeline.components.data_preprocessing.*",
+    "autosklearn.pipeline.components.regression.*",
+    "autosklearn.pipeline.components.classification.*",
+    "autosklearn.pipeline.components.feature_preprocessing.*",
+    "autosklearn.pipeline.util",
+    "autosklearn.pipeline.logging_",
+    "autosklearn.pipeline.create_searchspace_util",
+    "autosklearn.pipeline.implementations.util",
+    "autosklearn.pipeline.implementations.SparseOneHotEncoder",
+    "autosklearn.pipeline.implementations.MinorityCoalescer",
+    "autosklearn.pipeline.implementations.CategoryShift",
+    "autosklearn.experimental.selector",
+    "autosklearn.data.validation",
+    "autosklearn.data.abstract_data_manager",
+    "autosklearn.data.xy_data_manager",
+    "autosklearn.data.target_validator",
+    "autosklearn.data.feature_validator",
+    "autosklearn.util.single_threaded_client",
+    "autosklearn.util.logging_",
+]
+ignore_errors = true
+
+# Packages without exported types
+[[tool.mypy.overrides]]
+module = [
+    "sklearn.*",
+    "dask.*",
+    "ConfigSpace.*",
+    "arff.*",
+    "scipy.*",
+    "smac.*",
+    "pandas.*",
+    "pynisher.*",
+    "distro.*",
+    "joblib.*",
+    "threadpoolctl.*",
+    "setuptools.*",
+    "pkg_resources.*",
+    "yaml.*",
+]
+ignore_missing_imports = true
+
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 049e247a21..0000000000
--- a/pytest.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[pytest]
-testpaths =
-    test
diff --git a/scripts/01_create_commands.py b/scripts/01_create_commands.py
index c6e28c606b..72e406d3d7 100644
--- a/scripts/01_create_commands.py
+++ b/scripts/01_create_commands.py
@@ -5,45 +5,49 @@
 
 import openml
 
-sys.path.append('.')
+sys.path.append(".")
 from update_metadata_util import classification_tasks, regression_tasks
 
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--working-directory', type=str, required=True)
-parser.add_argument('--test', action='store_true')
+parser.add_argument("--working-directory", type=str, required=True)
+parser.add_argument("--test", action="store_true")
 args = parser.parse_args()
 working_directory = args.working_directory
 test = args.test
 
-command_file_name = os.path.join(working_directory, 'metadata_commands.txt')
+command_file_name = os.path.join(working_directory, "metadata_commands.txt")
 
 this_directory = os.path.dirname(os.path.abspath(__file__))
-script_name = 'run_auto-sklearn_for_metadata_generation.py'
+script_name = "run_auto-sklearn_for_metadata_generation.py"
 absolute_script_name = os.path.join(this_directory, script_name)
 
 commands = []
-for task_id in (classification_tasks if not test else (233, 245, 258)):
-    for metric in ('accuracy', 'balanced_accuracy', 'roc_auc', 'logloss'):
+for task_id in classification_tasks if not test else (233, 245, 258):
+    for metric in ("accuracy", "balanced_accuracy", "roc_auc", "logloss"):
 
         if (
             len(openml.tasks.get_task(task_id, download_data=False).class_labels) > 2
-            and metric == 'roc_auc'
+            and metric == "roc_auc"
         ):
             continue
 
-        command = ('python3 %s --working-directory %s --time-limit 86400 '
-                   '--per-run-time-limit 1800 --task-id %d -s 1 --metric %s' %
-                   (absolute_script_name, working_directory, task_id, metric))
+        command = (
+            "python3 %s --working-directory %s --time-limit 86400 "
+            "--per-run-time-limit 1800 --task-id %d -s 1 --metric %s"
+            % (absolute_script_name, working_directory, task_id, metric)
+        )
         commands.append(command)
-for task_id in (regression_tasks if not test else (360029, 360033)):
-    for metric in ('r2', 'root_mean_squared_error', 'mean_absolute_error'):
-        command = ('python3 %s --working-directory %s --time-limit 86400 '
-                   '--per-run-time-limit 1800 --task-id %d -s 1 --metric %s' %
-                   (absolute_script_name, working_directory, task_id, metric))
+for task_id in regression_tasks if not test else (360029, 360033):
+    for metric in ("r2", "root_mean_squared_error", "mean_absolute_error"):
+        command = (
+            "python3 %s --working-directory %s --time-limit 86400 "
+            "--per-run-time-limit 1800 --task-id %d -s 1 --metric %s"
+            % (absolute_script_name, working_directory, task_id, metric)
+        )
         commands.append(command)
 
-with open(command_file_name, 'w') as fh:
+with open(command_file_name, "w") as fh:
     for command in commands:
         fh.writelines(command)
-        fh.write('\n')
+        fh.write("\n")
diff --git a/scripts/02_retrieve_metadata.py b/scripts/02_retrieve_metadata.py
index 611b190dfa..f87f65ecc4 100644
--- a/scripts/02_retrieve_metadata.py
+++ b/scripts/02_retrieve_metadata.py
@@ -16,8 +16,9 @@
 from autosklearn.util import pipeline
 
 
-def retrieve_matadata(validation_directory, metric, configuration_space,
-                      cutoff=0, only_best=True):
+def retrieve_matadata(
+    validation_directory, metric, configuration_space, cutoff=0, only_best=True
+):
     if not only_best:
         raise NotImplementedError()
     if cutoff > 0:
@@ -29,9 +30,9 @@ def retrieve_matadata(validation_directory, metric, configuration_space,
     configurations_to_ids = dict()
 
     try:
-        validation_trajectory_files = glob.glob(os.path.join(
-            validation_directory, '*', '*', 'validation_trajectory_*.json'
-        ))
+        validation_trajectory_files = glob.glob(
+            os.path.join(validation_directory, "*", "*", "validation_trajectory_*.json")
+        )
     except FileNotFoundError:
         return {}, {}
 
@@ -66,7 +67,8 @@ def retrieve_matadata(validation_directory, metric, configuration_space,
 
                 try:
                     best_configuration = Configuration(
-                        configuration_space=configuration_space, values=config)
+                        configuration_space=configuration_space, values=config
+                    )
                     best_value = score
                     best_configuration_dir = validation_trajectory_file
                 except Exception as e:
@@ -74,18 +76,22 @@ def retrieve_matadata(validation_directory, metric, configuration_space,
                     n_broken += 1
 
         if task_name is None:
-            print('Could not find any configuration better than the default configuration!')
+            print(
+                "Could not find any configuration better than the default configuration!"
+            )
             continue
 
         if best_configuration is None:
-            print('Could not find a valid configuration; total %d, better %d, broken %d'
-                  % (n_configs, n_better, n_broken))
+            print(
+                "Could not find a valid configuration; total %d, better %d, broken %d"
+                % (n_configs, n_better, n_broken)
+            )
             continue
         elif best_configuration in configurations_to_ids:
-            print('Found configuration in', best_configuration_dir)
+            print("Found configuration in", best_configuration_dir)
             config_id = configurations_to_ids[best_configuration]
         else:
-            print('Found configuration in', best_configuration_dir)
+            print("Found configuration in", best_configuration_dir)
             config_id = len(configurations_to_ids)
             configurations_to_ids[config_id] = best_configuration
             configurations[config_id] = best_configuration
@@ -102,34 +108,33 @@ def retrieve_matadata(validation_directory, metric, configuration_space,
     return outputs, configurations
 
 
-def write_output(outputs, configurations, output_dir, configuration_space,
-                 metric):
+def write_output(outputs, configurations, output_dir, configuration_space, metric):
     arff_object = dict()
-    arff_object['attributes'] = [('instance_id', 'STRING'),
-                                 ('repetition', 'NUMERIC'),
-                                 ('algorithm', 'STRING'),
-                                 (metric, 'NUMERIC'),
-                                 ('runstatus',
-                                  ['ok', 'timeout', 'memout', 'not_applicable',
-                                   'crash', 'other'])]
-    arff_object['relation'] = "ALGORITHM_RUNS"
-    arff_object['description'] = ""
+    arff_object["attributes"] = [
+        ("instance_id", "STRING"),
+        ("repetition", "NUMERIC"),
+        ("algorithm", "STRING"),
+        (metric, "NUMERIC"),
+        ("runstatus", ["ok", "timeout", "memout", "not_applicable", "crash", "other"]),
+    ]
+    arff_object["relation"] = "ALGORITHM_RUNS"
+    arff_object["description"] = ""
 
     data = []
     keep_configurations = set()
     for dataset, (configuration_id, value) in outputs.items():
 
         if not np.isfinite(value):
-            runstatus = 'not_applicable'
+            runstatus = "not_applicable"
             value = None
         else:
-            runstatus = 'ok'
+            runstatus = "ok"
 
         line = [dataset, 1, configuration_id + 1, value, runstatus]
         data.append(line)
         keep_configurations.add(configuration_id)
 
-    arff_object['data'] = data
+    arff_object["data"] = data
 
     with open(os.path.join(output_dir, "algorithm_runs.arff"), "w") as fh:
         arff.dump(arff_object, fh)
@@ -139,7 +144,7 @@ def write_output(outputs, configurations, output_dir, configuration_space,
         if idx not in keep_configurations:
             continue
         configuration = configurations[idx]
-        line = {'idx': idx + 1}
+        line = {"idx": idx + 1}
         for hp_name in configuration:
             value = configuration[hp_name]
             if value is not None:
@@ -147,7 +152,7 @@ def write_output(outputs, configurations, output_dir, configuration_space,
 
         hyperparameters.append(line)
 
-    fieldnames = ['idx']
+    fieldnames = ["idx"]
     for hyperparameter in configuration_space.get_hyperparameters():
         fieldnames.append(hyperparameter.name)
     fieldnames = [fieldnames[0]] + sorted(fieldnames[1:])
@@ -158,16 +163,17 @@ def write_output(outputs, configurations, output_dir, configuration_space,
             csv_writer.writerow(line)
 
     description = dict()
-    description['algorithms_deterministic'] = \
-        ",".join([str(configuration_id + 1)
-                  for configuration_id in sorted(configurations.keys())])
-    description['algorithms_stochastic'] = \
-        ",".join([])
-    description['performance_measures'] = metric
-    description['performance_type'] = 'solution_quality'
-
-    with open(os.path.join(output_dir, "description.results.txt"),
-              "w") as fh:
+    description["algorithms_deterministic"] = ",".join(
+        [
+            str(configuration_id + 1)
+            for configuration_id in sorted(configurations.keys())
+        ]
+    )
+    description["algorithms_stochastic"] = ",".join([])
+    description["performance_measures"] = metric
+    description["performance_type"] = "solution_quality"
+
+    with open(os.path.join(output_dir, "description.results.txt"), "w") as fh:
         for key in description:
             fh.write("%s: %s\n" % (key, description[key]))
 
@@ -184,44 +190,56 @@ def main():
     cutoff = args.cutoff
     only_best = args.only_best
 
-    for task_type in ('classification', 'regression'):
-        if task_type == 'classification':
+    for task_type in ("classification", "regression"):
+        if task_type == "classification":
             metadata_sets = itertools.product(
-                [0, 1], [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION],
-                CLASSIFICATION_METRICS)
-            input_directory = os.path.join(working_directory, 'configuration',
-                                           'classification')
-        elif task_type == 'regression':
-            metadata_sets = itertools.product(
-                [0, 1], [REGRESSION], REGRESSION_METRICS)
-            input_directory = os.path.join(working_directory, 'configuration',
-                                           'regression')
+                [0, 1],
+                [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION],
+                CLASSIFICATION_METRICS,
+            )
+            input_directory = os.path.join(
+                working_directory, "configuration", "classification"
+            )
+        elif task_type == "regression":
+            metadata_sets = itertools.product([0, 1], [REGRESSION], REGRESSION_METRICS)
+            input_directory = os.path.join(
+                working_directory, "configuration", "regression"
+            )
         else:
             raise ValueError(task_type)
 
-        output_dir = os.path.join(working_directory, 'configuration_results')
+        output_dir = os.path.join(working_directory, "configuration_results")
 
         for sparse, task, metric in metadata_sets:
             print(TASK_TYPES_TO_STRING[task], metric, sparse)
 
-            output_dir_ = os.path.join(output_dir, '%s_%s_%s' % (
-                metric, TASK_TYPES_TO_STRING[task],
-                'sparse' if sparse else 'dense'))
+            output_dir_ = os.path.join(
+                output_dir,
+                "%s_%s_%s"
+                % (metric, TASK_TYPES_TO_STRING[task], "sparse" if sparse else "dense"),
+            )
 
             configuration_space = pipeline.get_configuration_space(
-                {'is_sparse': sparse, 'task': task})
+                {"is_sparse": sparse, "task": task}
+            )
 
             outputs, configurations = retrieve_matadata(
                 validation_directory=input_directory,
                 metric=metric,
                 cutoff=cutoff,
                 configuration_space=configuration_space,
-                only_best=only_best)
+                only_best=only_best,
+            )
 
             if len(outputs) == 0:
-                print("No output found for %s, %s, %s" %
-                      (metric, TASK_TYPES_TO_STRING[task],
-                       'sparse' if sparse else 'dense'))
+                print(
+                    "No output found for %s, %s, %s"
+                    % (
+                        metric,
+                        TASK_TYPES_TO_STRING[task],
+                        "sparse" if sparse else "dense",
+                    )
+                )
                 continue
 
             try:
@@ -229,8 +247,9 @@ def main():
             except:
                 pass
 
-            write_output(outputs, configurations, output_dir_,
-                         configuration_space, metric)
+            write_output(
+                outputs, configurations, output_dir_, configuration_space, metric
+            )
 
 
 if __name__ == "__main__":
diff --git a/scripts/03_calculate_metafeatures.py b/scripts/03_calculate_metafeatures.py
index 1d058c5dae..3b32dde8e3 100644
--- a/scripts/03_calculate_metafeatures.py
+++ b/scripts/03_calculate_metafeatures.py
@@ -11,15 +11,22 @@
 import numpy as np
 import pandas as pd
 
-from autosklearn.constants import BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, REGRESSION
+from autosklearn.constants import (
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    REGRESSION,
+)
 from autosklearn.metalearning.metafeatures import metafeatures
-from autosklearn.smbo import _calculate_metafeatures, _calculate_metafeatures_encoded, \
-    EXCLUDE_META_FEATURES_REGRESSION, EXCLUDE_META_FEATURES_CLASSIFICATION
+from autosklearn.smbo import (
+    _calculate_metafeatures,
+    _calculate_metafeatures_encoded,
+    EXCLUDE_META_FEATURES_REGRESSION,
+    EXCLUDE_META_FEATURES_CLASSIFICATION,
+)
 from autosklearn.util.stopwatch import StopWatch
 
-sys.path.append('.')
-from update_metadata_util import load_task, classification_tasks, \
-    regression_tasks
+sys.path.append(".")
+from update_metadata_util import load_task, classification_tasks, regression_tasks
 
 logger = logging.getLogger("03_calculate_metafeatures")
 
@@ -28,7 +35,7 @@ def calculate_metafeatures(task_id):
     X_train, y_train, X_test, y_test, cat, task_type, dataset_name = load_task(task_id)
     watch = StopWatch()
 
-    if task_type == 'classification':
+    if task_type == "classification":
         if len(np.unique(y_train)) == 2:
             task_type = BINARY_CLASSIFICATION
         else:
@@ -37,20 +44,27 @@ def calculate_metafeatures(task_id):
         task_type = REGRESSION
 
     _metafeatures_labels = _calculate_metafeatures(
-        x_train=X_train, y_train=y_train, data_feat_type=cat,
-        data_info_task=task_type, basename=dataset_name, logger_=logger,
+        x_train=X_train,
+        y_train=y_train,
+        data_feat_type=cat,
+        data_info_task=task_type,
+        basename=dataset_name,
+        logger_=logger,
         watcher=watch,
     )
 
     _metafeatures_encoded_labels = _calculate_metafeatures_encoded(
-        x_train=X_train, y_train=y_train, data_feat_type=cat,
-        task=task_type, basename=dataset_name, logger_=logger,
+        x_train=X_train,
+        y_train=y_train,
+        data_feat_type=cat,
+        task=task_type,
+        basename=dataset_name,
+        logger_=logger,
         watcher=watch,
     )
 
     mf = _metafeatures_labels
-    mf.metafeature_values.update(
-        _metafeatures_encoded_labels.metafeature_values)
+    mf.metafeature_values.update(_metafeatures_encoded_labels.metafeature_values)
 
     return mf
 
@@ -59,15 +73,15 @@ def calculate_metafeatures(task_id):
     parser = ArgumentParser()
     parser.add_argument("--working-directory", type=str, required=True)
     parser.add_argument("--memory-limit", type=int, default=3072)
-    parser.add_argument("--test-mode", action='store_true')
+    parser.add_argument("--test-mode", action="store_true")
 
     args = parser.parse_args()
     working_directory = args.working_directory
     memory_limit = args.memory_limit
     test_mode = args.test_mode
 
-    for task_type in ('classification', 'regression'):
-        output_directory = os.path.join(working_directory, 'metafeatures', task_type)
+    for task_type in ("classification", "regression"):
+        output_directory = os.path.join(working_directory, "metafeatures", task_type)
         try:
             os.makedirs(output_directory)
         except:
@@ -75,7 +89,7 @@ def calculate_metafeatures(task_id):
 
         all_metafeatures = {}
 
-        if task_type == 'classification':
+        if task_type == "classification":
             tasks = classification_tasks
         else:
             tasks = regression_tasks
@@ -90,12 +104,9 @@ def producer():
             for task_id in tasks:
                 yield task_id
 
-        memory = joblib.Memory(location='/tmp/joblib', verbose=10)
+        memory = joblib.Memory(location="/tmp/joblib", verbose=10)
         cached_calculate_metafeatures = memory.cache(calculate_metafeatures)
-        mfs = [
-            cached_calculate_metafeatures(task_id)
-            for task_id in producer()
-        ]
+        mfs = [cached_calculate_metafeatures(task_id) for task_id in producer()]
 
         for mf in mfs:
             if mf is not None:
@@ -110,45 +121,50 @@ def producer():
         for i, task_id in enumerate(all_metafeatures):
             calculation_times[task_id] = dict()
             for metafeature_name in sorted(
-                    all_metafeatures[task_id].metafeature_values):
+                all_metafeatures[task_id].metafeature_values
+            ):
                 metafeature_value = all_metafeatures[task_id].metafeature_values[
-                    metafeature_name]
-                calculation_times[task_id][metafeature_name] = \
-                    metafeature_value.time
+                    metafeature_name
+                ]
+                calculation_times[task_id][metafeature_name] = metafeature_value.time
                 if metafeature_value.type_ == "HELPERFUNCTION":
-                    helperfunction_values[task_id][metafeature_name] = \
-                        metafeature_value.value
+                    helperfunction_values[task_id][
+                        metafeature_name
+                    ] = metafeature_value.value
                 else:
-                    metafeature_values[task_id][metafeature_name] = \
-                        metafeature_value.value
+                    metafeature_values[task_id][
+                        metafeature_name
+                    ] = metafeature_value.value
 
         calculation_times = pd.DataFrame(calculation_times).transpose()
         calculation_times = calculation_times.sort_index()
-        with open(os.path.join(output_directory, "calculation_times.csv"),
-                  "w") as fh:
+        with open(os.path.join(output_directory, "calculation_times.csv"), "w") as fh:
             fh.write(calculation_times.to_csv())
 
         # Write all metafeatures in the aslib1.0 format
-        metafeature_values = metafeature_values = pd.DataFrame(metafeature_values).transpose()
+        metafeature_values = metafeature_values = pd.DataFrame(
+            metafeature_values
+        ).transpose()
         metafeature_values = metafeature_values.sort_index()
         arff_object = dict()
-        arff_object['attributes'] = [('instance_id', 'STRING'),
-                                     ('repetition', 'NUMERIC')] + \
-                                    [('%s' % name, 'NUMERIC') for name in
-                                     metafeature_values.columns]
-        arff_object['relation'] = "FEATURE_VALUES"
-        arff_object['description'] = ""
+        arff_object["attributes"] = [
+            ("instance_id", "STRING"),
+            ("repetition", "NUMERIC"),
+        ] + [("%s" % name, "NUMERIC") for name in metafeature_values.columns]
+        arff_object["relation"] = "FEATURE_VALUES"
+        arff_object["description"] = ""
 
         data = []
         for idx in metafeature_values.index:
             line = [idx, 1]
-            line += [value if np.isfinite(value) else None
-                     for value in metafeature_values.loc[idx, :].values]
+            line += [
+                value if np.isfinite(value) else None
+                for value in metafeature_values.loc[idx, :].values
+            ]
             data.append(line)
-        arff_object['data'] = data
+        arff_object["data"] = data
 
-        with open(os.path.join(output_directory, "feature_values.arff"),
-                  "w") as fh:
+        with open(os.path.join(output_directory, "feature_values.arff"), "w") as fh:
             arff.dump(arff_object, fh)
 
         # Feature steps and runtimes according to the aslib1.0 format
@@ -157,7 +173,8 @@ def producer():
 
         exclude_metafeatures = (
             EXCLUDE_META_FEATURES_CLASSIFICATION
-            if task_type == 'classification' else EXCLUDE_META_FEATURES_REGRESSION
+            if task_type == "classification"
+            else EXCLUDE_META_FEATURES_REGRESSION
         )
 
         for metafeature_name in metafeatures.metafeatures.functions:
@@ -174,42 +191,48 @@ def producer():
 
         # Write the feature runstatus in the aslib1.0 format
         arff_object = dict()
-        arff_object['attributes'] = [('instance_id', 'STRING'),
-                                     ('repetition', 'NUMERIC')] + \
-                                    [('%s' % name,
-                                      ['ok', 'timeout', 'memout', 'presolved',
-                                       'crash', 'other'])
-                                     for name in feature_steps]
-        arff_object['relation'] = "FEATURE_RUNSTATUS"
-        arff_object['description'] = ""
+        arff_object["attributes"] = [
+            ("instance_id", "STRING"),
+            ("repetition", "NUMERIC"),
+        ] + [
+            ("%s" % name, ["ok", "timeout", "memout", "presolved", "crash", "other"])
+            for name in feature_steps
+        ]
+        arff_object["relation"] = "FEATURE_RUNSTATUS"
+        arff_object["description"] = ""
 
         data = []
         for idx in metafeature_values.index:
             line = [idx, 1]
             for feature_step in feature_steps:
                 if feature_step in helperfunction_values[idx]:
-                    line.append('ok' if helperfunction_values[feature_step] is not \
-                                        None else 'other')
+                    line.append(
+                        "ok"
+                        if helperfunction_values[feature_step] is not None
+                        else "other"
+                    )
                 elif feature_step in metafeature_values.loc[idx]:
-                    line.append('ok' if np.isfinite(metafeature_values.loc[idx][
-                                                        feature_step]) else 'other')
+                    line.append(
+                        "ok"
+                        if np.isfinite(metafeature_values.loc[idx][feature_step])
+                        else "other"
+                    )
                 else:
-                    line.append('other')
+                    line.append("other")
 
             data.append(line)
-        arff_object['data'] = data
+        arff_object["data"] = data
 
-        with open(os.path.join(output_directory, "feature_runstatus.arff"),
-                  "w") as fh:
+        with open(os.path.join(output_directory, "feature_runstatus.arff"), "w") as fh:
             arff.dump(arff_object, fh)
 
         arff_object = dict()
-        arff_object['attributes'] = [('instance_id', 'STRING'),
-                                     ('repetition', 'NUMERIC')] + \
-                                    [('%s' % feature_step, 'NUMERIC') for
-                                     feature_step in feature_steps]
-        arff_object['relation'] = "FEATURE_COSTS"
-        arff_object['description'] = ""
+        arff_object["attributes"] = [
+            ("instance_id", "STRING"),
+            ("repetition", "NUMERIC"),
+        ] + [("%s" % feature_step, "NUMERIC") for feature_step in feature_steps]
+        arff_object["relation"] = "FEATURE_COSTS"
+        arff_object["description"] = ""
 
         data = []
         for instance_id in calculation_times.index:
@@ -220,33 +243,35 @@ def producer():
                 for feature in feature_steps[feature_step]:
                     time_ += calculation_times[feature][instance_id]
                 if not np.isfinite(time_):
-                    raise ValueError("Feature cost %s for instance %s and feature "
-                                     "step %s not finite" % (time_, instance_id, feature))
+                    raise ValueError(
+                        "Feature cost %s for instance %s and feature "
+                        "step %s not finite" % (time_, instance_id, feature)
+                    )
                 line.append(time_)
             data.append(line)
-        arff_object['data'] = data
+        arff_object["data"] = data
 
-        with open(os.path.join(output_directory, "feature_costs.arff"),
-                  "w") as fh:
+        with open(os.path.join(output_directory, "feature_costs.arff"), "w") as fh:
             arff.dump(arff_object, fh)
 
         # Write the features part of the description.txt to a file
         description = OrderedDict()
-        description['features_cutoff_time'] = '3600'
-        description['features_cutoff_memory'] = args.memory_limit
-        description['number_of_feature_steps'] = str(len(feature_steps))
+        description["features_cutoff_time"] = "3600"
+        description["features_cutoff_memory"] = args.memory_limit
+        description["number_of_feature_steps"] = str(len(feature_steps))
 
         for feature_step in feature_steps:
-            description['feature_step %s' % feature_step] = \
-                ", ".join(feature_steps[feature_step])
-        description['features_deterministic'] = ", ".join([
-                                                              metafeature_name for
-                                                              metafeature_name in
-                                                              metafeature_names])
-        description['features_stochastic'] = ''
-        description['default_steps'] = ", ".join(feature_steps)
-
-        with open(os.path.join(output_directory,
-                               "description.features.txt"), "w") as fh:
+            description["feature_step %s" % feature_step] = ", ".join(
+                feature_steps[feature_step]
+            )
+        description["features_deterministic"] = ", ".join(
+            [metafeature_name for metafeature_name in metafeature_names]
+        )
+        description["features_stochastic"] = ""
+        description["default_steps"] = ", ".join(feature_steps)
+
+        with open(
+            os.path.join(output_directory, "description.features.txt"), "w"
+        ) as fh:
             for entry in description:
                 fh.write("%s: %s\n" % (entry, description[entry]))
diff --git a/scripts/04_create_aslib_files.py b/scripts/04_create_aslib_files.py
index d5e10a9c15..8c83dc1648 100644
--- a/scripts/04_create_aslib_files.py
+++ b/scripts/04_create_aslib_files.py
@@ -10,16 +10,16 @@
 if __name__ == "__main__":
     parser = ArgumentParser()
     parser.add_argument("--working-directory", type=str, required=True)
-    parser.add_argument("--scenario_id", type=str, default='auto-sklearn')
+    parser.add_argument("--scenario_id", type=str, default="auto-sklearn")
     parser.add_argument("--algorithm_cutoff_time", type=int, default=1800)
     parser.add_argument("--algorithm_cutoff_memory", type=int, default=3072)
 
     args = parser.parse_args()
     working_directory = args.working_directory
 
-    output_dir = os.path.join(working_directory, 'metadata')
-    results_dir = os.path.join(working_directory, 'configuration_results')
-    metafeatures_dir = os.path.join(working_directory, 'metafeatures')
+    output_dir = os.path.join(working_directory, "metadata")
+    results_dir = os.path.join(working_directory, "configuration_results")
+    metafeatures_dir = os.path.join(working_directory, "metafeatures")
 
     scenario_id = args.scenario_id
     algorithm_cutoff_time = args.algorithm_cutoff_time
@@ -31,25 +31,29 @@
     except (OSError, IOError):
         pass
 
-    for task_type in ('classification', 'regression'):
-        if task_type == 'classification':
+    for task_type in ("classification", "regression"):
+        if task_type == "classification":
             metadata_sets = itertools.product(
-                [0, 1], [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION],
-                CLASSIFICATION_METRICS)
-        elif task_type == 'regression':
-            metadata_sets = itertools.product(
-                [0, 1], [REGRESSION], REGRESSION_METRICS)
+                [0, 1],
+                [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION],
+                CLASSIFICATION_METRICS,
+            )
+        elif task_type == "regression":
+            metadata_sets = itertools.product([0, 1], [REGRESSION], REGRESSION_METRICS)
         else:
             raise ValueError(task_type)
 
-        input_directory = os.path.join(working_directory, 'configuration', task_type)
+        input_directory = os.path.join(working_directory, "configuration", task_type)
         metafeatures_dir_for_task = os.path.join(metafeatures_dir, task_type)
 
         for sparse, task, metric in metadata_sets:
             print(TASK_TYPES_TO_STRING[task], metric, sparse)
 
-            dir_name = '%s_%s_%s' % (metric, TASK_TYPES_TO_STRING[task],
-                                     'sparse' if sparse else 'dense')
+            dir_name = "%s_%s_%s" % (
+                metric,
+                TASK_TYPES_TO_STRING[task],
+                "sparse" if sparse else "dense",
+            )
             output_dir_ = os.path.join(output_dir, dir_name)
             results_dir_ = os.path.join(results_dir, dir_name)
 
@@ -67,21 +71,19 @@
                 pass
 
             # Create description.txt
-            with open(os.path.join(metafeatures_dir_for_task,
-                                   "description.features.txt")) as fh:
+            with open(
+                os.path.join(metafeatures_dir_for_task, "description.features.txt")
+            ) as fh:
                 description_metafeatures = fh.read()
 
-            with open(os.path.join(results_dir_,
-                                   "description.results.txt")) as fh:
+            with open(os.path.join(results_dir_, "description.results.txt")) as fh:
                 description_results = fh.read()
 
             description = [description_metafeatures, description_results]
             description.append("scenario_id: %s" % scenario_id)
             description.append("maximize: false")
-            description.append(
-                "algorithm_cutoff_time: %d" % algorithm_cutoff_time)
-            description.append(
-                "algorithm_cutoff_memory: %d" % algorithm_cutoff_memory)
+            description.append("algorithm_cutoff_time: %d" % algorithm_cutoff_time)
+            description.append("algorithm_cutoff_memory: %d" % algorithm_cutoff_memory)
 
             with open(os.path.join(output_dir_, "description.txt"), "w") as fh:
                 for line in description:
@@ -89,59 +91,54 @@
                     fh.write("\n")
 
             # Copy feature values and add instance id
-            with open(os.path.join(metafeatures_dir_for_task,
-                                   "feature_values.arff")) as fh:
+            with open(
+                os.path.join(metafeatures_dir_for_task, "feature_values.arff")
+            ) as fh:
                 feature_values = arff.load(fh)
 
-            feature_values['relation'] = scenario_id + "_" + feature_values[
-                'relation']
+            feature_values["relation"] = scenario_id + "_" + feature_values["relation"]
 
-            with open(os.path.join(output_dir_, "feature_values.arff"),
-                      "w") as fh:
+            with open(os.path.join(output_dir_, "feature_values.arff"), "w") as fh:
                 arff.dump(feature_values, fh)
 
             # Copy feature runstatus and add instance id
-            with open(os.path.join(metafeatures_dir_for_task,
-                                   "feature_runstatus.arff")) as fh:
+            with open(
+                os.path.join(metafeatures_dir_for_task, "feature_runstatus.arff")
+            ) as fh:
                 feature_runstatus = arff.load(fh)
 
-            feature_runstatus['relation'] = scenario_id + "_" + \
-                                            feature_runstatus['relation']
+            feature_runstatus["relation"] = (
+                scenario_id + "_" + feature_runstatus["relation"]
+            )
 
-            with open(os.path.join(output_dir_, "feature_runstatus.arff"), "w") \
-                    as fh:
+            with open(os.path.join(output_dir_, "feature_runstatus.arff"), "w") as fh:
                 arff.dump(feature_runstatus, fh)
 
             # Copy feature runstatus and add instance id
             with open(
-                    os.path.join(metafeatures_dir_for_task, "feature_costs.arff")) as fh:
+                os.path.join(metafeatures_dir_for_task, "feature_costs.arff")
+            ) as fh:
                 feature_costs = arff.load(fh)
 
-            feature_costs['relation'] = scenario_id + "_" + feature_costs[
-                'relation']
-            for i in range(len(feature_costs['data'])):
-                for j in range(2, len(feature_costs['data'][i])):
-                    feature_costs['data'][i][j] = \
-                        round(feature_costs['data'][i][j], 5)
+            feature_costs["relation"] = scenario_id + "_" + feature_costs["relation"]
+            for i in range(len(feature_costs["data"])):
+                for j in range(2, len(feature_costs["data"][i])):
+                    feature_costs["data"][i][j] = round(feature_costs["data"][i][j], 5)
 
-            with open(os.path.join(output_dir_, "feature_costs.arff"), "w") \
-                    as fh:
+            with open(os.path.join(output_dir_, "feature_costs.arff"), "w") as fh:
                 arff.dump(feature_costs, fh)
 
             # Copy algorithm runs and add instance id
             with open(os.path.join(results_dir_, "algorithm_runs.arff")) as fh:
                 algorithm_runs = arff.load(fh)
 
-            algorithm_runs['relation'] = scenario_id + "_" + algorithm_runs[
-                'relation']
+            algorithm_runs["relation"] = scenario_id + "_" + algorithm_runs["relation"]
 
-            with open(os.path.join(output_dir_, "algorithm_runs.arff"), "w") \
-                    as fh:
+            with open(os.path.join(output_dir_, "algorithm_runs.arff"), "w") as fh:
                 arff.dump(algorithm_runs, fh)
 
             # Copy configurations file
             with open(os.path.join(results_dir_, "configurations.csv")) as fh:
                 algorithm_runs = fh.read()
-            with open(os.path.join(output_dir_, "configurations.csv"), "w") \
-                    as fh:
+            with open(os.path.join(output_dir_, "configurations.csv"), "w") as fh:
                 fh.write(algorithm_runs)
diff --git a/scripts/2015_nips_paper/plot/plot_ranks.py b/scripts/2015_nips_paper/plot/plot_ranks.py
index 5be095389c..b2e85248b7 100644
--- a/scripts/2015_nips_paper/plot/plot_ranks.py
+++ b/scripts/2015_nips_paper/plot/plot_ranks.py
@@ -17,8 +17,8 @@ def read_csv(fn, has_header=True, data_type=str):
     """
     data = list()
     header = None
-    with open(fn, 'r') as csvfile:
-        csv_reader = csv.reader(csvfile, delimiter=',', quotechar='|')
+    with open(fn, "r") as csvfile:
+        csv_reader = csv.reader(csvfile, delimiter=",", quotechar="|")
         for row in csv_reader:
             if header is None and has_header:
                 header = row
@@ -37,7 +37,7 @@ def fill_trajectory(performance_list, time_list):
     series = pd.concat(series_list, axis=1)
 
     # Fill missing performance values (NaNs) with last non-NaN value.
-    series = series.fillna(method='ffill')
+    series = series.fillna(method="ffill")
 
     # return the trajectories over seeds (series object)
     return series
@@ -52,10 +52,10 @@ def main():
     working_directory = "../log_output"
 
     # list of models
-    model_list = ['vanilla', 'ensemble', 'metalearning', 'meta_ensemble']
+    model_list = ["vanilla", "ensemble", "metalearning", "meta_ensemble"]
 
     # list of seeds
-    seed_dir = os.path.join(working_directory, 'vanilla')
+    seed_dir = os.path.join(working_directory, "vanilla")
     seed_list = [seed for seed in os.listdir(seed_dir)]
 
     # list of tasks
@@ -74,21 +74,23 @@ def main():
             for seed in seed_list:
                 # collect all csv files of different seeds for current model and
                 # current task.
-                if model in ['vanilla', 'ensemble']:
-                    csv_file = os.path.join(working_directory,
-                                            'vanilla',
-                                            seed,
-                                            task_id,
-                                            "score_{}.csv".format(model)
-                                            )
-
-                elif model in ['metalearning', 'meta_ensemble']:
-                    csv_file = os.path.join(working_directory,
-                                            'metalearning',
-                                            seed,
-                                            task_id,
-                                            "score_{}.csv".format(model),
-                                            )
+                if model in ["vanilla", "ensemble"]:
+                    csv_file = os.path.join(
+                        working_directory,
+                        "vanilla",
+                        seed,
+                        task_id,
+                        "score_{}.csv".format(model),
+                    )
+
+                elif model in ["metalearning", "meta_ensemble"]:
+                    csv_file = os.path.join(
+                        working_directory,
+                        "metalearning",
+                        seed,
+                        task_id,
+                        "score_{}.csv".format(model),
+                    )
                 csv_files.append(csv_file)
 
             performance_list = []
@@ -99,8 +101,9 @@ def main():
                 _, csv_data = read_csv(fl, has_header=True)
                 csv_data = np.array(csv_data)
                 # Replace too high values with args.maxsize
-                data = [min([sys.maxsize, float(i.strip())]) for i in
-                        csv_data[:, 2]]  # test trajectories are stored in third column
+                data = [
+                    min([sys.maxsize, float(i.strip())]) for i in csv_data[:, 2]
+                ]  # test trajectories are stored in third column
 
                 time_steps = [float(i.strip()) for i in csv_data[:, 0]]
                 assert time_steps[0] == 0
@@ -123,15 +126,16 @@ def main():
     n_tasks = len(task_list)
 
     for i in range(n_iter):
-        pick = np.random.choice(all_trajectories[0][0].shape[1],
-                                size=(len(model_list)))
+        pick = np.random.choice(all_trajectories[0][0].shape[1], size=(len(model_list)))
 
         for j in range(n_tasks):
             all_trajectories_tmp = pd.DataFrame(
-                {model_list[k]: at[j].iloc[:, pick[k]] for
-                 k, at in enumerate(all_trajectories)}
+                {
+                    model_list[k]: at[j].iloc[:, pick[k]]
+                    for k, at in enumerate(all_trajectories)
+                }
             )
-            all_trajectories_tmp = all_trajectories_tmp.fillna(method='ffill', axis=0)
+            all_trajectories_tmp = all_trajectories_tmp.fillna(method="ffill", axis=0)
             r_tmp = all_trajectories_tmp.rank(axis=1)
             all_rankings.append(r_tmp)
 
@@ -141,7 +145,7 @@ def main():
         for ranking in all_rankings:
             ranks_for_model.append(ranking.loc[:, model])
         ranks_for_model = pd.DataFrame(ranks_for_model)
-        ranks_for_model = ranks_for_model.fillna(method='ffill', axis=1)
+        ranks_for_model = ranks_for_model.fillna(method="ffill", axis=1)
         final_ranks.append(ranks_for_model.mean(skipna=True))
 
     # Step 3. Plot the average ranks over time.
@@ -155,8 +159,8 @@ def main():
         X_data.append(max_runtime)
         y_data.append(y)
         plt.plot(X_data, y_data, label=model)
-        plt.xlabel('time [sec]')
-        plt.ylabel('average rank')
+        plt.xlabel("time [sec]")
+        plt.ylabel("average rank")
         plt.legend()
     plt.savefig(saveto)
 
diff --git a/scripts/2015_nips_paper/run/remove_dataset_from_metadata.py b/scripts/2015_nips_paper/run/remove_dataset_from_metadata.py
index f31e16e65f..d16e67e23c 100644
--- a/scripts/2015_nips_paper/run/remove_dataset_from_metadata.py
+++ b/scripts/2015_nips_paper/run/remove_dataset_from_metadata.py
@@ -4,25 +4,27 @@
 from shutil import copyfile
 
 
-def remove_dataset_from_aslib_arff(input_file,
-                                   output_file,
-                                   id,
-                                   ):
+def remove_dataset_from_aslib_arff(
+    input_file,
+    output_file,
+    id,
+):
     with open(input_file) as fh:
         arff_object = arff.load(fh)
-    for i in range(len(arff_object['data']) - 1, -1, -1):
-        if str(arff_object['data'][i][0]) == str(id):
-            del arff_object['data'][i]
+    for i in range(len(arff_object["data"]) - 1, -1, -1):
+        if str(arff_object["data"][i][0]) == str(id):
+            del arff_object["data"][i]
 
     with open(output_file, "w") as fh:
         arff.dump(arff_object, fh)
     del arff_object
 
 
-def remove_dataset(metadata_directory,
-                   output_directory,
-                   id,
-                   ):
+def remove_dataset(
+    metadata_directory,
+    output_directory,
+    id,
+):
     metadata_sub_directories = os.listdir(metadata_directory)
 
     for metadata_sub_directory in metadata_sub_directories:
diff --git a/scripts/2015_nips_paper/run/run_auto_sklearn.py b/scripts/2015_nips_paper/run/run_auto_sklearn.py
index 366280692e..960ab7be80 100644
--- a/scripts/2015_nips_paper/run/run_auto_sklearn.py
+++ b/scripts/2015_nips_paper/run/run_auto_sklearn.py
@@ -21,11 +21,12 @@ def load_task(task_id):
     X_test = X[test_indices]
     y_test = y[test_indices]
     dataset = openml.datasets.get_dataset(task.dataset_id)
-    _, _, cat = dataset.get_data(return_categorical_indicator=True,
-                                 target=task.target_name)
+    _, _, cat = dataset.get_data(
+        return_categorical_indicator=True, target=task.target_name
+    )
     del _
     del dataset
-    cat = ['categorical' if c else 'numerical' for c in cat]
+    cat = ["categorical" if c else "numerical" for c in cat]
 
     unique = np.unique(y_train)
     mapping = {unique_value: i for i, unique_value in enumerate(unique)}
@@ -35,13 +36,14 @@ def load_task(task_id):
     return X_train, y_train, X_test, y_test, cat
 
 
-def run_experiment(working_directory,
-                   time_limit,
-                   per_run_time_limit,
-                   task_id,
-                   seed,
-                   use_metalearning,
-                   ):
+def run_experiment(
+    working_directory,
+    time_limit,
+    per_run_time_limit,
+    task_id,
+    seed,
+    use_metalearning,
+):
     # set this to local dataset cache
     # openml.config.cache_directory = os.path.join(working_directory, "../cache")
 
@@ -57,12 +59,14 @@ def run_experiment(working_directory,
     if use_metalearning is True:
         # path to the original metadata directory.
         metadata_directory = os.path.abspath(os.path.dirname(__file__))
-        metadata_directory = os.path.join(metadata_directory,
-                                          "../../../autosklearn/metalearning/files/")
+        metadata_directory = os.path.join(
+            metadata_directory, "../../../autosklearn/metalearning/files/"
+        )
 
         # Create new metadata directory not containing task_id.
-        new_metadata_directory = os.path.abspath(os.path.join(working_directory,
-                                                              "metadata_%i" % task_id))
+        new_metadata_directory = os.path.abspath(
+            os.path.join(working_directory, "metadata_%i" % task_id)
+        )
 
         try:
             os.makedirs(new_metadata_directory)
@@ -73,100 +77,105 @@ def run_experiment(working_directory,
         remove_dataset(metadata_directory, new_metadata_directory, task_id)
 
         automl_arguments = {
-            'time_left_for_this_task': time_limit,
-            'per_run_time_limit': per_run_time_limit,
-            'initial_configurations_via_metalearning': 25,
-            'ensemble_size': 0,
-            'seed': seed,
-            'memory_limit': 3072,
-            'resampling_strategy': 'holdout',
-            'resampling_strategy_arguments': {'train_size': 0.67},
-            'tmp_folder': tmp_dir,
-            'delete_tmp_folder_after_terminate': False,
-            'disable_evaluator_output': False,
-            'metadata_directory': new_metadata_directory
+            "time_left_for_this_task": time_limit,
+            "per_run_time_limit": per_run_time_limit,
+            "initial_configurations_via_metalearning": 25,
+            "ensemble_size": 0,
+            "seed": seed,
+            "memory_limit": 3072,
+            "resampling_strategy": "holdout",
+            "resampling_strategy_arguments": {"train_size": 0.67},
+            "tmp_folder": tmp_dir,
+            "delete_tmp_folder_after_terminate": False,
+            "disable_evaluator_output": False,
+            "metadata_directory": new_metadata_directory,
         }
 
     # Without metalearning
     else:
         automl_arguments = {
-            'time_left_for_this_task': time_limit,
-            'per_run_time_limit': per_run_time_limit,
-            'initial_configurations_via_metalearning': 0,
-            'ensemble_size': 0,
-            'seed': seed,
-            'memory_limit': 3072,
-            'resampling_strategy': 'holdout',
-            'resampling_strategy_arguments': {'train_size': 0.67},
-            'tmp_folder': tmp_dir,
-            'delete_tmp_folder_after_terminate': False,
-            'disable_evaluator_output': False,
+            "time_left_for_this_task": time_limit,
+            "per_run_time_limit": per_run_time_limit,
+            "initial_configurations_via_metalearning": 0,
+            "ensemble_size": 0,
+            "seed": seed,
+            "memory_limit": 3072,
+            "resampling_strategy": "holdout",
+            "resampling_strategy_arguments": {"train_size": 0.67},
+            "tmp_folder": tmp_dir,
+            "delete_tmp_folder_after_terminate": False,
+            "disable_evaluator_output": False,
         }
 
     automl = AutoSklearnClassifier(**automl_arguments)
 
     X_train, y_train, X_test, y_test, cat = load_task(task_id)
 
-    automl.fit(X_train, y_train,
-               dataset_name=str(task_id),
-               X_test=X_test, y_test=y_test,
-               metric=balanced_accuracy)
+    automl.fit(
+        X_train,
+        y_train,
+        dataset_name=str(task_id),
+        X_test=X_test,
+        y_test=y_test,
+        metric=balanced_accuracy,
+    )
 
 
-def main(working_directory,
-         output_file,
-         task_id,
-         seed,
-         model,
-         time_limit,
-         per_run_time_limit):
+def main(
+    working_directory, output_file, task_id, seed, model, time_limit, per_run_time_limit
+):
     # vanilla and metalearning must be called first before ensemble and
     # meta_ensemble can be called, respectively.
     if model == "vanilla":
-        run_experiment(working_directory,
-                       time_limit,
-                       per_run_time_limit,
-                       task_id,
-                       seed,
-                       use_metalearning=False,
-                       )
-        score_ensemble.main(working_directory,
-                            output_file,
-                            task_id,
-                            seed,
-                            ensemble_size=1,
-                            )
+        run_experiment(
+            working_directory,
+            time_limit,
+            per_run_time_limit,
+            task_id,
+            seed,
+            use_metalearning=False,
+        )
+        score_ensemble.main(
+            working_directory,
+            output_file,
+            task_id,
+            seed,
+            ensemble_size=1,
+        )
     elif model == "metalearning":
-        run_experiment(working_directory,
-                       time_limit,
-                       per_run_time_limit,
-                       task_id,
-                       seed,
-                       use_metalearning=True,
-                       )
-        score_ensemble.main(working_directory,
-                            output_file,
-                            task_id,
-                            seed,
-                            ensemble_size=1,
-                            )
+        run_experiment(
+            working_directory,
+            time_limit,
+            per_run_time_limit,
+            task_id,
+            seed,
+            use_metalearning=True,
+        )
+        score_ensemble.main(
+            working_directory,
+            output_file,
+            task_id,
+            seed,
+            ensemble_size=1,
+        )
     else:
-        score_ensemble.main(working_directory,
-                            output_file,
-                            task_id,
-                            seed,
-                            ensemble_size=50,
-                            )
+        score_ensemble.main(
+            working_directory,
+            output_file,
+            task_id,
+            seed,
+            ensemble_size=50,
+        )
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--working-directory', type=str, required=True)
+    parser.add_argument("--working-directory", type=str, required=True)
     parser.add_argument("--output-file", type=str, required=True)
     parser.add_argument("--time-limit", type=int, required=True)
     parser.add_argument("--per-runtime-limit", type=int, required=True)
-    parser.add_argument('--task-id', type=int, required=True)
-    parser.add_argument('-s', '--seed', type=int)
+    parser.add_argument("--task-id", type=int, required=True)
+    parser.add_argument("-s", "--seed", type=int)
     parser.add_argument("--model", type=str, required=True)
 
     args = parser.parse_args()
@@ -178,11 +187,12 @@ def main(working_directory,
     time_limit = args.time_limit
     per_run_time_limit = args.per_runtime_limit
 
-    main(working_directory,
-         output_file,
-         task_id,
-         seed,
-         model,
-         time_limit,
-         per_run_time_limit,
-         )
+    main(
+        working_directory,
+        output_file,
+        task_id,
+        seed,
+        model,
+        time_limit,
+        per_run_time_limit,
+    )
diff --git a/scripts/2015_nips_paper/run/score_ensemble.py b/scripts/2015_nips_paper/run/score_ensemble.py
index 3d10954d94..1e873f01fd 100644
--- a/scripts/2015_nips_paper/run/score_ensemble.py
+++ b/scripts/2015_nips_paper/run/score_ensemble.py
@@ -14,21 +14,21 @@
 
 
 def _load_file(f):
-    split = f.split('_')
+    split = f.split("_")
     as_seed = int(split[-2])
-    ta_seed = int(split[-1].split('.')[0])
+    ta_seed = int(split[-1].split(".")[0])
     np_array = np.load(f)
     return np_array, (as_seed, ta_seed), os.path.getmtime(f)
 
 
 def read_files(directory, seed=None, n_jobs=1):
 
-    seed_pattern = '*' if seed is None else str(seed)
-    glob_pattern = os.path.join(directory, "predictions_*_%s_*.npy" %
-                                seed_pattern)
+    seed_pattern = "*" if seed is None else str(seed)
+    glob_pattern = os.path.join(directory, "predictions_*_%s_*.npy" % seed_pattern)
     files = sorted(glob.glob(glob_pattern))
     files = joblib.Parallel(n_jobs=n_jobs, verbose=10)(
-        joblib.delayed(_load_file)(f=f) for f in files)
+        joblib.delayed(_load_file)(f=f) for f in files
+    )
 
     return files
 
@@ -38,13 +38,13 @@ def main(input_directories, output_file, task_id, seed, ensemble_size, n_jobs=1)
 
     if isinstance(input_directories, str):
         # add seed and task id directories
-        input_directories += '/%i/%i' % (seed, task_id)
+        input_directories += "/%i/%i" % (seed, task_id)
         input_directories = [input_directories]
 
     else:
         new_directories = []
         for dir in input_directories:
-            dir += '/%i/%i' % (seed, task_id)
+            dir += "/%i/%i" % (seed, task_id)
             new_directories.append(dir)
         input_directories = new_directories
 
@@ -54,28 +54,28 @@ def main(input_directories, output_file, task_id, seed, ensemble_size, n_jobs=1)
 
     # Get the prediction files.
     for input_directory in input_directories:
-        print('Loading files from input directory:', input_directory)
+        print("Loading files from input directory:", input_directory)
         validation_files_ = read_files(
-            os.path.join(input_directory,
-                         '.auto-sklearn/predictions_ensemble'),
-            n_jobs=n_jobs)
+            os.path.join(input_directory, ".auto-sklearn/predictions_ensemble"),
+            n_jobs=n_jobs,
+        )
         validation_files.extend(validation_files_)
         test_files_ = read_files(
-            os.path.join(input_directory,
-                         '.auto-sklearn/predictions_test'),
-            n_jobs=n_jobs)
+            os.path.join(input_directory, ".auto-sklearn/predictions_test"),
+            n_jobs=n_jobs,
+        )
         test_files.extend(test_files_)
 
         assert len(validation_files_) > 0
         assert len(validation_files_) == len(test_files_)
 
-        print('Loaded %d files!' % len(validation_files_))
+        print("Loaded %d files!" % len(validation_files_))
 
         # if not specified, we get all files.
-        seed_pattern = '*' if seed is None else str(seed)
-        glob_pattern = os.path.join(input_directory,
-                                    ".auto-sklearn",
-                                    "start_time_%s" % seed_pattern)
+        seed_pattern = "*" if seed is None else str(seed)
+        glob_pattern = os.path.join(
+            input_directory, ".auto-sklearn", "start_time_%s" % seed_pattern
+        )
         start_time_files = glob.glob(glob_pattern)
 
         # find the earliest startime.
@@ -90,14 +90,15 @@ def main(input_directories, output_file, task_id, seed, ensemble_size, n_jobs=1)
 
     validation_files.sort(key=lambda t: t[-1])
 
-    keys_to_test_files = {test_file[1]: test_file
-                          for test_file in test_files}
+    keys_to_test_files = {test_file[1]: test_file for test_file in test_files}
     # Resort such that both files have the same order
-    test_files = [keys_to_test_files[validation_file[1]]
-                  for validation_file in validation_files]
+    test_files = [
+        keys_to_test_files[validation_file[1]] for validation_file in validation_files
+    ]
 
     assert [validation_file[1] for validation_file in validation_files] == [
-        test_file[1] for test_file in test_files]
+        test_file[1] for test_file in test_files
+    ]
 
     losses = []
     top_models_at_step = dict()
@@ -106,7 +107,7 @@ def main(input_directories, output_file, task_id, seed, ensemble_size, n_jobs=1)
         temporary_directory=input_directory,
         output_directory=input_directory + "_output",
         delete_tmp_folder_after_terminate=False,
-        prefix="auto-sklearn"
+        prefix="auto-sklearn",
     )
     valid_labels = backend.load_targets_ensemble()
     score = balanced_accuracy
@@ -124,46 +125,63 @@ def main(input_directories, output_file, task_id, seed, ensemble_size, n_jobs=1)
             if top_model in models_to_remove:
                 models_to_remove.remove(top_model)
 
-    print("Removing the following %d models from the library: %s"
-          % (len(models_to_remove), models_to_remove))
+    print(
+        "Removing the following %d models from the library: %s"
+        % (len(models_to_remove), models_to_remove)
+    )
     for model_id in models_to_remove:
         validation_files[model_id] = None
         test_files[model_id] = None
 
-    print('Starting ensemble building!')
+    print("Starting ensemble building!")
     output = joblib.Parallel(n_jobs=n_jobs, verbose=20)(
-        joblib.delayed(
-            evaluate)(input_directory=input_directories[0],
-                      validation_files=[validation_files[j] for
-                                        j in range(len(validation_files))
-                                        if j in top_models_at_step[i]],
-                      test_files=[test_files[j] for
-                                  j in range(len(test_files))
-                                  if j in top_models_at_step[i]],
-                      ensemble_size=ensemble_size)
-        for i in range(len(test_files)))
+        joblib.delayed(evaluate)(
+            input_directory=input_directories[0],
+            validation_files=[
+                validation_files[j]
+                for j in range(len(validation_files))
+                if j in top_models_at_step[i]
+            ],
+            test_files=[
+                test_files[j]
+                for j in range(len(test_files))
+                if j in top_models_at_step[i]
+            ],
+            ensemble_size=ensemble_size,
+        )
+        for i in range(len(test_files))
+    )
 
     # Create output csv file
     file_path = os.path.abspath("%s/%s" % (input_directory, output_file))
     with open(file_path, "w") as csv_file:
-        fieldnames = ['Time', 'Training (Empirical) Performance',
-                      'Test Set Performance']
+        fieldnames = [
+            "Time",
+            "Training (Empirical) Performance",
+            "Test Set Performance",
+        ]
         csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
         csv_writer.writeheader()
 
         # First time step
-        csv_writer.writerow({'Time': 0,
-                             'Training (Empirical) Performance': 1.0,
-                             'Test Set Performance': 1.0})
+        csv_writer.writerow(
+            {
+                "Time": 0,
+                "Training (Empirical) Performance": 1.0,
+                "Test Set Performance": 1.0,
+            }
+        )
 
         for i, o in enumerate(output):
-            csv_writer.writerow({'Time': o['ensemble_time']
-                                 + o['time_function_evaluation']
-                                 - starttime,
-                                 'Training (Empirical) Performance':
-                                 o['ensemble_error'],
-                                 'Test Set Performance':
-                                 o['ensemble_test_error']})
+            csv_writer.writerow(
+                {
+                    "Time": o["ensemble_time"]
+                    + o["time_function_evaluation"]
+                    - starttime,
+                    "Training (Empirical) Performance": o["ensemble_error"],
+                    "Test Set Performance": o["ensemble_test_error"],
+                }
+            )
 
 
 def evaluate(input_directory, validation_files, test_files, ensemble_size=50):
@@ -187,18 +205,18 @@ def evaluate(input_directory, validation_files, test_files, ensemble_size=50):
 
     # Build the ensemble
     start = time.time()
-    ensemble_selection = EnsembleSelection(ensemble_size=ensemble_size,
-                                           task_type=D.info['task'],
-                                           metric=score,
-                                           random_state=np.random.RandomState())
+    ensemble_selection = EnsembleSelection(
+        ensemble_size=ensemble_size,
+        task_type=D.info["task"],
+        metric=score,
+        random_state=np.random.RandomState(),
+    )
 
     validation_predictions = np.array([v[0] for v in validation_files])
     test_predictions = np.array([t[0] for t in test_files])
 
-    ensemble_selection.fit(validation_predictions, valid_labels,
-                           identifiers=None)
-    y_hat_ensemble = ensemble_selection.predict(np.array(
-        validation_predictions))
+    ensemble_selection.fit(validation_predictions, valid_labels, identifiers=None)
+    y_hat_ensemble = ensemble_selection.predict(np.array(validation_predictions))
     y_hat_test = ensemble_selection.predict(np.array(test_predictions))
 
     # Compute validation error
@@ -209,21 +227,22 @@ def evaluate(input_directory, validation_files, test_files, ensemble_size=50):
 
     ensemble_time = time.time() - start
 
-    rval = {'ensemble_time': ensemble_time,
-            'time_function_evaluation': time_function_evaluation,
-            'ensemble_error': ensemble_error,
-            'ensemble_test_error': ensemble_test_error}
+    rval = {
+        "ensemble_time": ensemble_time,
+        "time_function_evaluation": time_function_evaluation,
+        "ensemble_error": ensemble_error,
+        "ensemble_test_error": ensemble_test_error,
+    }
 
     return rval
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = ArgumentParser()
-    parser.add_argument('--input-directory', type=str,
-                        required=True, nargs='+')
-    parser.add_argument('--task-id', type=int, required=True)
-    parser.add_argument('-s', '--seed', type=int)
-    parser.add_argument("--output-file", type=str, default='score_ensemble.csv')
+    parser.add_argument("--input-directory", type=str, required=True, nargs="+")
+    parser.add_argument("--task-id", type=int, required=True)
+    parser.add_argument("-s", "--seed", type=int)
+    parser.add_argument("--output-file", type=str, default="score_ensemble.csv")
     parser.add_argument("--ensemble-size", type=int, default=50)
     parser.add_argument("--n-jobs", type=int, default=1)
     args = parser.parse_args()
diff --git a/scripts/2015_nips_paper/setup/get_tasks.py b/scripts/2015_nips_paper/setup/get_tasks.py
index 09f06a0a64..98c4ee085e 100644
--- a/scripts/2015_nips_paper/setup/get_tasks.py
+++ b/scripts/2015_nips_paper/setup/get_tasks.py
@@ -4,30 +4,162 @@
 
 
 # List of dataset IDs used for the NIPS experiment.
-dataset_ids = [1000, 1002, 1018, 1019, 1020, 1021, 1036, 1040, 1041, 1049, 1050, 1053,
-               1056, 1067, 1068, 1069, 1111, 1112, 1114, 1116, 1119, 1120, 1128, 1130,
-               1134, 1138, 1139, 1142, 1146, 1161, 1166, 12, 14, 16, 179, 180, 181, 182,
-               184, 185, 18, 21, 22, 23, 24, 26, 273, 28, 293, 300, 30, 31, 32, 351, 354,
-               357, 36, 389, 38, 390, 391, 392, 393, 395, 396, 398, 399, 3, 401, 44, 46,
-               554, 57, 60, 679, 6, 715, 718, 720, 722, 723, 727, 728, 734, 735, 737,
-               740, 741, 743, 751, 752, 761, 772, 797, 799, 803, 806, 807, 813, 816, 819,
-               821, 822, 823, 833, 837, 843, 845, 846, 847, 849, 866, 871, 881, 897, 901,
-               903, 904, 910, 912, 913, 914, 917, 923, 930, 934, 953, 958, 959, 962, 966,
-               971, 976, 977, 978, 979, 980, 991, 993, 995]
+dataset_ids = [
+    1000,
+    1002,
+    1018,
+    1019,
+    1020,
+    1021,
+    1036,
+    1040,
+    1041,
+    1049,
+    1050,
+    1053,
+    1056,
+    1067,
+    1068,
+    1069,
+    1111,
+    1112,
+    1114,
+    1116,
+    1119,
+    1120,
+    1128,
+    1130,
+    1134,
+    1138,
+    1139,
+    1142,
+    1146,
+    1161,
+    1166,
+    12,
+    14,
+    16,
+    179,
+    180,
+    181,
+    182,
+    184,
+    185,
+    18,
+    21,
+    22,
+    23,
+    24,
+    26,
+    273,
+    28,
+    293,
+    300,
+    30,
+    31,
+    32,
+    351,
+    354,
+    357,
+    36,
+    389,
+    38,
+    390,
+    391,
+    392,
+    393,
+    395,
+    396,
+    398,
+    399,
+    3,
+    401,
+    44,
+    46,
+    554,
+    57,
+    60,
+    679,
+    6,
+    715,
+    718,
+    720,
+    722,
+    723,
+    727,
+    728,
+    734,
+    735,
+    737,
+    740,
+    741,
+    743,
+    751,
+    752,
+    761,
+    772,
+    797,
+    799,
+    803,
+    806,
+    807,
+    813,
+    816,
+    819,
+    821,
+    822,
+    823,
+    833,
+    837,
+    843,
+    845,
+    846,
+    847,
+    849,
+    866,
+    871,
+    881,
+    897,
+    901,
+    903,
+    904,
+    910,
+    912,
+    913,
+    914,
+    917,
+    923,
+    930,
+    934,
+    953,
+    958,
+    959,
+    962,
+    966,
+    971,
+    976,
+    977,
+    978,
+    979,
+    980,
+    991,
+    993,
+    995,
+]
 
 
 def get_task_ids(dataset_ids):
     # return task ids of corresponding datset ids.
 
     # active tasks
-    tasks_a = openml.tasks.list_tasks(task_type_id=1, status='active')
+    tasks_a = openml.tasks.list_tasks(task_type_id=1, status="active")
     tasks_a = pd.DataFrame.from_dict(tasks_a, orient="index")
 
     # query only those with holdout as the resampling startegy.
     tasks_a = tasks_a[(tasks_a.estimation_procedure == "33% Holdout set")]
 
     # deactivated tasks
-    tasks_d = openml.tasks.list_tasks(task_type_id=1, status='deactivated')
+    tasks_d = openml.tasks.list_tasks(task_type_id=1, status="deactivated")
     tasks_d = pd.DataFrame.from_dict(tasks_d, orient="index")
 
     tasks_d = tasks_d[(tasks_d.estimation_procedure == "33% Holdout set")]
@@ -47,9 +179,9 @@ def get_task_ids(dataset_ids):
 
 def main():
     task_ids = sorted(get_task_ids(dataset_ids))
-    string_to_print = ''
+    string_to_print = ""
     for tid in task_ids:
-        string_to_print += str(tid) + ' '
+        string_to_print += str(tid) + " "
     print(string_to_print)  # print the task ids for bash script.
 
 
diff --git a/scripts/run_auto-sklearn_for_metadata_generation.py b/scripts/run_auto-sklearn_for_metadata_generation.py
index e1fc71a135..6b82b233c7 100644
--- a/scripts/run_auto-sklearn_for_metadata_generation.py
+++ b/scripts/run_auto-sklearn_for_metadata_generation.py
@@ -1,4 +1,4 @@
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     import argparse
     import json
@@ -11,27 +11,35 @@
     from autosklearn.classification import AutoSklearnClassifier
     from autosklearn.regression import AutoSklearnRegressor
     from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
-    from autosklearn.metrics import accuracy, balanced_accuracy, roc_auc, log_loss, r2, \
-        mean_squared_error, mean_absolute_error, root_mean_squared_error, CLASSIFICATION_METRICS, \
-        REGRESSION_METRICS
+    from autosklearn.metrics import (
+        accuracy,
+        balanced_accuracy,
+        roc_auc,
+        log_loss,
+        r2,
+        mean_squared_error,
+        mean_absolute_error,
+        root_mean_squared_error,
+        CLASSIFICATION_METRICS,
+        REGRESSION_METRICS,
+    )
 
     from smac.runhistory.runhistory import RunInfo
     from smac.scenario.scenario import Scenario
     from smac.stats.stats import Stats
     from smac.tae import StatusType
 
-    sys.path.append('.')
+    sys.path.append(".")
     from update_metadata_util import load_task
 
-
     parser = argparse.ArgumentParser()
-    parser.add_argument('--working-directory', type=str, required=True)
-    parser.add_argument('--time-limit', type=int, required=True)
-    parser.add_argument('--per-run-time-limit', type=int, required=True)
-    parser.add_argument('--task-id', type=int, required=True)
-    parser.add_argument('--metric', type=str, required=True)
-    parser.add_argument('-s', '--seed', type=int, required=True)
-    parser.add_argument('--unittest', action='store_true')
+    parser.add_argument("--working-directory", type=str, required=True)
+    parser.add_argument("--time-limit", type=int, required=True)
+    parser.add_argument("--per-run-time-limit", type=int, required=True)
+    parser.add_argument("--task-id", type=int, required=True)
+    parser.add_argument("--metric", type=str, required=True)
+    parser.add_argument("-s", "--seed", type=int, required=True)
+    parser.add_argument("--unittest", action="store_true")
     args = parser.parse_args()
 
     working_directory = args.working_directory
@@ -44,8 +52,9 @@
 
     X_train, y_train, X_test, y_test, cat, task_type, dataset_name = load_task(task_id)
 
-    configuration_output_dir = os.path.join(working_directory, 'configuration',
-                                            task_type)
+    configuration_output_dir = os.path.join(
+        working_directory, "configuration", task_type
+    )
     os.makedirs(configuration_output_dir, exist_ok=True)
     tmp_dir = os.path.join(configuration_output_dir, str(task_id), metric)
     os.makedirs(tmp_dir, exist_ok=True)
@@ -54,49 +63,55 @@
     autosklearn_directory = os.path.join(tempdir, "dir")
 
     automl_arguments = {
-        'time_left_for_this_task': time_limit,
-        'per_run_time_limit': per_run_time_limit,
-        'initial_configurations_via_metalearning': 0,
-        'ensemble_size': 0,
-        'ensemble_nbest': 0,
-        'seed': seed,
-        'memory_limit': 3072,
-        'resampling_strategy': 'partial-cv',
-        'delete_tmp_folder_after_terminate': False,
-        'tmp_folder': autosklearn_directory,
-        'disable_evaluator_output': True,
+        "time_left_for_this_task": time_limit,
+        "per_run_time_limit": per_run_time_limit,
+        "initial_configurations_via_metalearning": 0,
+        "ensemble_size": 0,
+        "ensemble_nbest": 0,
+        "seed": seed,
+        "memory_limit": 3072,
+        "resampling_strategy": "partial-cv",
+        "delete_tmp_folder_after_terminate": False,
+        "tmp_folder": autosklearn_directory,
+        "disable_evaluator_output": True,
     }
 
     if is_test:
-        automl_arguments['resampling_strategy_arguments'] = {'folds': 2}
-        if task_type == 'classification':
-            include = {'classifier': ['libsvm_svc'], 'feature_preprocessor': ['no_preprocessing']}
-            automl_arguments['include'] = include
-        elif task_type == 'regression':
-            include = {'regressor': ['extra_trees'], 'feature_preprocessor': ['no_preprocessing']}
-            automl_arguments['include'] = include
+        automl_arguments["resampling_strategy_arguments"] = {"folds": 2}
+        if task_type == "classification":
+            include = {
+                "classifier": ["libsvm_svc"],
+                "feature_preprocessor": ["no_preprocessing"],
+            }
+            automl_arguments["include"] = include
+        elif task_type == "regression":
+            include = {
+                "regressor": ["extra_trees"],
+                "feature_preprocessor": ["no_preprocessing"],
+            }
+            automl_arguments["include"] = include
         else:
-            raise ValueError('Unsupported task type: %s' % str(task_type))
+            raise ValueError("Unsupported task type: %s" % str(task_type))
     else:
-        automl_arguments['resampling_strategy_arguments'] = {'folds': 10}
+        automl_arguments["resampling_strategy_arguments"] = {"folds": 10}
         include = None
 
     metric = {
-        'accuracy': accuracy,
-        'balanced_accuracy': balanced_accuracy,
-        'roc_auc': roc_auc,
-        'logloss': log_loss,
-        'r2': r2,
-        'mean_squared_error': mean_squared_error,
-        'root_mean_squared_error': root_mean_squared_error,
-        'mean_absolute_error': mean_absolute_error,
+        "accuracy": accuracy,
+        "balanced_accuracy": balanced_accuracy,
+        "roc_auc": roc_auc,
+        "logloss": log_loss,
+        "r2": r2,
+        "mean_squared_error": mean_squared_error,
+        "root_mean_squared_error": root_mean_squared_error,
+        "mean_absolute_error": mean_absolute_error,
     }[metric]
-    automl_arguments['metric'] = metric
+    automl_arguments["metric"] = metric
 
-    if task_type == 'classification':
+    if task_type == "classification":
         automl = AutoSklearnClassifier(**automl_arguments)
         scorer_list = CLASSIFICATION_METRICS
-    elif task_type == 'regression':
+    elif task_type == "regression":
         automl = AutoSklearnRegressor(**automl_arguments)
         scorer_list = REGRESSION_METRICS
     else:
@@ -104,8 +119,14 @@
 
     scoring_functions = [scorer for name, scorer in scorer_list.items()]
 
-    automl.fit(X_train, y_train, dataset_name=dataset_name,
-               feat_type=cat, X_test=X_test, y_test=y_test)
+    automl.fit(
+        X_train,
+        y_train,
+        dataset_name=dataset_name,
+        feat_type=cat,
+        X_test=X_test,
+        y_test=y_test,
+    )
     trajectory = automl.trajectory_
 
     incumbent_id_to_model = {}
@@ -117,40 +138,44 @@
     else:
         memory_limit_factor = 2
 
-    print('Starting to validate configurations')
+    print("Starting to validate configurations")
     for i, entry in enumerate(trajectory):
-        print('Starting to validate configuration %d/%d' % (i + 1, len(trajectory)))
+        print("Starting to validate configuration %d/%d" % (i + 1, len(trajectory)))
         incumbent_id = entry.incumbent_id
         train_performance = entry.train_perf
         if incumbent_id not in incumbent_id_to_model:
             config = entry.incumbent
 
-            logger = logging.getLogger('Testing:)')
+            logger = logging.getLogger("Testing:)")
             stats = Stats(
-                Scenario({
-                    'cutoff_time': per_run_time_limit * 2,
-                    'run_obj': 'quality',
-                })
+                Scenario(
+                    {
+                        "cutoff_time": per_run_time_limit * 2,
+                        "run_obj": "quality",
+                    }
+                )
             )
             stats.start_timing()
             # To avoid the output "first run crashed"...
             stats.submitted_ta_runs += 1
             stats.finished_ta_runs += 1
-            memory_lim = memory_limit_factor * automl_arguments['memory_limit']
+            memory_lim = memory_limit_factor * automl_arguments["memory_limit"]
 
             pipeline, run_info, run_value = automl.fit_pipeline(
-                X=X_train, y=y_train,
-                X_test=X_test, y_test=y_test,
-                resampling_strategy='test',
+                X=X_train,
+                y=y_train,
+                X_test=X_test,
+                y_test=y_test,
+                resampling_strategy="test",
                 memory_limit=memory_lim,
                 disable_file_output=True,
                 logger=logger,
                 stats=stats,
                 scoring_functions=scoring_functions,
                 include=include,
-                metric=automl_arguments['metric'],
-                pynisher_context='spawn',
-                cutoff=per_run_time_limit*3,
+                metric=automl_arguments["metric"],
+                pynisher_context="spawn",
+                cutoff=per_run_time_limit * 3,
                 config=config,
             )
 
@@ -159,58 +184,65 @@
 
             # print(additional_run_info)
 
-            validated_trajectory.append(list(entry) + [task_id] +
-                                        [run_value.additional_info])
-        print('Finished validating configuration %d/%d' % (i + 1, len(trajectory)))
-    print('Finished to validate configurations')
-
-    print('Starting to copy data to configuration directory', flush=True)
-    validated_trajectory = [entry[:2] + [entry[2].get_dictionary()] + entry[3:]
-                            for entry in validated_trajectory]
-    validated_trajectory_file = os.path.join(tmp_dir, 'validation_trajectory_%d.json' % seed)
-    with open(validated_trajectory_file, 'w') as fh:
+            validated_trajectory.append(
+                list(entry) + [task_id] + [run_value.additional_info]
+            )
+        print("Finished validating configuration %d/%d" % (i + 1, len(trajectory)))
+    print("Finished to validate configurations")
+
+    print("Starting to copy data to configuration directory", flush=True)
+    validated_trajectory = [
+        entry[:2] + [entry[2].get_dictionary()] + entry[3:]
+        for entry in validated_trajectory
+    ]
+    validated_trajectory_file = os.path.join(
+        tmp_dir, "validation_trajectory_%d.json" % seed
+    )
+    with open(validated_trajectory_file, "w") as fh:
         json.dump(validated_trajectory, fh, indent=4)
 
-
     for dirpath, dirnames, filenames in os.walk(autosklearn_directory, topdown=False):
         print(dirpath, dirnames, filenames)
         for filename in filenames:
-            if filename == 'datamanager.pkl':
+            if filename == "datamanager.pkl":
                 os.remove(os.path.join(dirpath, filename))
-            elif filename == 'configspace.pcs':
+            elif filename == "configspace.pcs":
                 os.remove(os.path.join(dirpath, filename))
         for dirname in dirnames:
-            if dirname in ('models', 'cv_models'):
+            if dirname in ("models", "cv_models"):
                 os.rmdir(os.path.join(dirpath, dirname))
 
-    print('*' * 80)
-    print('Going to copy the configuration directory')
-    script = 'cp -r %s %s' % (autosklearn_directory, os.path.join(tmp_dir, 'auto-sklearn-output'))
+    print("*" * 80)
+    print("Going to copy the configuration directory")
+    script = "cp -r %s %s" % (
+        autosklearn_directory,
+        os.path.join(tmp_dir, "auto-sklearn-output"),
+    )
     proc = subprocess.run(
         script,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         shell=True,
-        executable='/bin/bash',
+        executable="/bin/bash",
     )
-    print('*' * 80)
+    print("*" * 80)
     print(script)
     print(proc.stdout)
     print(proc.stderr)
-    print('Finished copying the configuration directory')
+    print("Finished copying the configuration directory")
 
-    if not tempdir.startswith('/tmp'):
-        raise ValueError('%s must not start with /tmp' % tempdir)
-    script = 'rm -rf %s' % tempdir
-    print('*' * 80)
+    if not tempdir.startswith("/tmp"):
+        raise ValueError("%s must not start with /tmp" % tempdir)
+    script = "rm -rf %s" % tempdir
+    print("*" * 80)
     print(script)
     proc = subprocess.run(
         script,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         shell=True,
-        executable='/bin/bash',
+        executable="/bin/bash",
     )
     print(proc.stdout)
     print(proc.stderr)
-    print('Finished configuring')
+    print("Finished configuring")
diff --git a/scripts/update_metadata_util.py b/scripts/update_metadata_util.py
index 153e63c6cf..8ed99d9bd0 100644
--- a/scripts/update_metadata_util.py
+++ b/scripts/update_metadata_util.py
@@ -3,37 +3,327 @@
 
 
 classification_tasks = [
-    232, 236, 241, 245, 253, 254, 256, 258, 260, 262, 267, 271, 273, 275, 279, 288, 336,
-    340, 2119, 2120, 2121, 2122, 2123, 2125, 2356, 3044, 3047, 3048, 3049, 3053, 3054,
-    3055, 75089, 75092, 75093, 75098, 75100, 75108, 75109, 75112, 75114, 75115, 75116,
-    75118, 75120, 75121, 75125, 75126, 75129, 75131, 75133, 75134, 75136, 75139, 75141,
-    75142, 75143, 75146, 75147, 75148, 75149, 75153, 75154, 75156, 75157, 75159, 75161,
-    75163, 75166, 75169, 75171, 75173, 75174, 75176, 75178, 75179, 75180, 75184, 75185,
-    75187, 75192, 75195, 75196, 75199, 75210, 75212, 75213, 75215, 75217, 75219, 75221,
-    75223, 75225, 75232, 75233, 75234, 75235, 75236, 75237, 75239, 75250, 126021, 126024,
-    126028, 126030, 126031, 146574, 146575, 146576, 146577, 146578, 146583, 146586,
-    146592, 146593, 146594, 146596, 146597, 146600, 146601, 146602, 146603, 146679,
-    166859, 166866, 166872, 166875, 166882, 166897, 166905, 166906, 166913, 166915,
-    166931, 166932, 166944, 166950, 166951, 166953, 166956, 166957, 166958, 166959,
-    166970, 166996, 167085, 167086, 167087, 167088, 167089, 167090, 167094, 167096,
-    167097, 167099, 167100, 167101, 167103, 167105, 167106, 167202, 167203, 167204,
-    167205, 168785, 168791, 189779, 189786, 189828, 189829, 189836, 189840, 189841,
-    189843, 189844, 189845, 189846, 189857, 189858, 189859, 189863, 189864, 189869,
-    189870, 189875, 189878, 189880, 189881, 189882, 189883, 189884, 189887, 189890,
-    189893, 189894, 189899, 189900, 189902, 190154, 190155, 190156, 190157, 190158,
-    190159, 211720, 211721, 211722, 211723, 211724
+    232,
+    236,
+    241,
+    245,
+    253,
+    254,
+    256,
+    258,
+    260,
+    262,
+    267,
+    271,
+    273,
+    275,
+    279,
+    288,
+    336,
+    340,
+    2119,
+    2120,
+    2121,
+    2122,
+    2123,
+    2125,
+    2356,
+    3044,
+    3047,
+    3048,
+    3049,
+    3053,
+    3054,
+    3055,
+    75089,
+    75092,
+    75093,
+    75098,
+    75100,
+    75108,
+    75109,
+    75112,
+    75114,
+    75115,
+    75116,
+    75118,
+    75120,
+    75121,
+    75125,
+    75126,
+    75129,
+    75131,
+    75133,
+    75134,
+    75136,
+    75139,
+    75141,
+    75142,
+    75143,
+    75146,
+    75147,
+    75148,
+    75149,
+    75153,
+    75154,
+    75156,
+    75157,
+    75159,
+    75161,
+    75163,
+    75166,
+    75169,
+    75171,
+    75173,
+    75174,
+    75176,
+    75178,
+    75179,
+    75180,
+    75184,
+    75185,
+    75187,
+    75192,
+    75195,
+    75196,
+    75199,
+    75210,
+    75212,
+    75213,
+    75215,
+    75217,
+    75219,
+    75221,
+    75223,
+    75225,
+    75232,
+    75233,
+    75234,
+    75235,
+    75236,
+    75237,
+    75239,
+    75250,
+    126021,
+    126024,
+    126028,
+    126030,
+    126031,
+    146574,
+    146575,
+    146576,
+    146577,
+    146578,
+    146583,
+    146586,
+    146592,
+    146593,
+    146594,
+    146596,
+    146597,
+    146600,
+    146601,
+    146602,
+    146603,
+    146679,
+    166859,
+    166866,
+    166872,
+    166875,
+    166882,
+    166897,
+    166905,
+    166906,
+    166913,
+    166915,
+    166931,
+    166932,
+    166944,
+    166950,
+    166951,
+    166953,
+    166956,
+    166957,
+    166958,
+    166959,
+    166970,
+    166996,
+    167085,
+    167086,
+    167087,
+    167088,
+    167089,
+    167090,
+    167094,
+    167096,
+    167097,
+    167099,
+    167100,
+    167101,
+    167103,
+    167105,
+    167106,
+    167202,
+    167203,
+    167204,
+    167205,
+    168785,
+    168791,
+    189779,
+    189786,
+    189828,
+    189829,
+    189836,
+    189840,
+    189841,
+    189843,
+    189844,
+    189845,
+    189846,
+    189857,
+    189858,
+    189859,
+    189863,
+    189864,
+    189869,
+    189870,
+    189875,
+    189878,
+    189880,
+    189881,
+    189882,
+    189883,
+    189884,
+    189887,
+    189890,
+    189893,
+    189894,
+    189899,
+    189900,
+    189902,
+    190154,
+    190155,
+    190156,
+    190157,
+    190158,
+    190159,
+    211720,
+    211721,
+    211722,
+    211723,
+    211724,
 ]
 regression_tasks = [
-    359997, 359998, 359999, 360000, 360001, 360002, 360003, 167146, 360004, 360005, 360006,
-    360007, 211696, 360009, 360010, 360011, 360012, 360013, 360014, 360015, 360016, 360017,
-    360018, 360019, 360020, 360021, 360022, 360023, 360024, 360025, 360026, 360027, 360028,
-    360029, 360030, 360031, 360032, 360033, 360034, 360035, 360036, 360037, 360038, 360039,
-    360040, 360041, 360042, 360043, 360044, 360045, 360046, 360047, 360048, 360049, 360050,
-    360051, 360052, 360053, 360054, 360055, 360056, 360057, 360058, 360059, 360060, 360061,
-    360062, 360063, 360064, 360066, 360067, 360068, 360069, 360070, 360071, 360072, 360073,
-    360074, 360075, 360076, 360077, 360078, 360079, 360080, 360081, 360082, 360083, 360084,
-    360085, 360086, 360087, 360088, 360089, 360090, 360091, 360092, 360093, 360094, 360095,
-    360096, 360097, 360098, 360100, 360101, 360102, 360103, 360104, 360105, 360106, 360107,
+    359997,
+    359998,
+    359999,
+    360000,
+    360001,
+    360002,
+    360003,
+    167146,
+    360004,
+    360005,
+    360006,
+    360007,
+    211696,
+    360009,
+    360010,
+    360011,
+    360012,
+    360013,
+    360014,
+    360015,
+    360016,
+    360017,
+    360018,
+    360019,
+    360020,
+    360021,
+    360022,
+    360023,
+    360024,
+    360025,
+    360026,
+    360027,
+    360028,
+    360029,
+    360030,
+    360031,
+    360032,
+    360033,
+    360034,
+    360035,
+    360036,
+    360037,
+    360038,
+    360039,
+    360040,
+    360041,
+    360042,
+    360043,
+    360044,
+    360045,
+    360046,
+    360047,
+    360048,
+    360049,
+    360050,
+    360051,
+    360052,
+    360053,
+    360054,
+    360055,
+    360056,
+    360057,
+    360058,
+    360059,
+    360060,
+    360061,
+    360062,
+    360063,
+    360064,
+    360066,
+    360067,
+    360068,
+    360069,
+    360070,
+    360071,
+    360072,
+    360073,
+    360074,
+    360075,
+    360076,
+    360077,
+    360078,
+    360079,
+    360080,
+    360081,
+    360082,
+    360083,
+    360084,
+    360085,
+    360086,
+    360087,
+    360088,
+    360089,
+    360090,
+    360091,
+    360092,
+    360093,
+    360094,
+    360095,
+    360096,
+    360097,
+    360098,
+    360100,
+    360101,
+    360102,
+    360103,
+    360104,
+    360105,
+    360106,
+    360107,
     360108,
 ]
 
@@ -51,13 +341,13 @@ def load_task(task_id):
     name = dataset.name.lower()
     del _
     del dataset
-    cat = {i: 'categorical' if c else 'numerical' for i, c in enumerate(cat)}
+    cat = {i: "categorical" if c else "numerical" for i, c in enumerate(cat)}
 
     if isinstance(task, openml.tasks.OpenMLClassificationTask):
-        task_type = 'classification'
+        task_type = "classification"
     elif isinstance(task, openml.tasks.OpenMLRegressionTask):
-        task_type = 'regression'
+        task_type = "regression"
     else:
-        raise ValueError('Unknown task type')
+        raise ValueError("Unknown task type")
 
     return X_train, y_train, X_test, y_test, cat, task_type, name
diff --git a/setup.py b/setup.py
index 6107e60321..003b573bd4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,38 +1,46 @@
 # -*- encoding: utf-8 -*-
 import os
 import sys
-from setuptools import setup, find_packages
 
+from setuptools import find_packages, setup
+
+HERE = os.path.abspath(os.path.dirname(__file__))
 
 # Check if Auto-sklearn *could* run on the given system
-if os.name != 'posix':
+if os.name != "posix":
     raise ValueError(
-        'Detected unsupported operating system: %s. Please check '
-        'the compability information of auto-sklearn: https://automl.github.io'
-        '/auto-sklearn/master/installation.html#windows-osx-compatibility' %
-        sys.platform
+        "Detected unsupported operating system: %s. Please check "
+        "the compability information of auto-sklearn: https://automl.github.io"
+        "/auto-sklearn/master/installation.html#windows-osx-compatibility"
+        % sys.platform
     )
 
 if sys.version_info < (3, 7):
     raise ValueError(
-        'Unsupported Python version %d.%d.%d found. Auto-sklearn requires Python '
-        '3.7 or higher.' % (sys.version_info.major, sys.version_info.minor, sys.version_info.micro)
+        "Unsupported Python version %d.%d.%d found. Auto-sklearn requires Python "
+        "3.7 or higher."
+        % (sys.version_info.major, sys.version_info.minor, sys.version_info.micro)
     )
 
-HERE = os.path.abspath(os.path.dirname(__file__))
-with open(os.path.join(HERE, 'requirements.txt')) as fp:
-    install_reqs = [r.rstrip() for r in fp.readlines()
-                    if not r.startswith('#') and not r.startswith('git+')]
+with open(os.path.join(HERE, "requirements.txt")) as fp:
+    install_reqs = [
+        r.rstrip()
+        for r in fp.readlines()
+        if not r.startswith("#") and not r.startswith("git+")
+    ]
 
-extras_reqs={
+extras_reqs = {
     "test": [
         "pytest>=4.6",
-        "mypy",
+        "pytest-cov",
         "pytest-xdist",
         "pytest-timeout",
+        "mypy",
+        "isort",
+        "black",
+        "pydocstyle",
         "openml",
         "pre-commit",
-        "pytest-cov",
     ],
     "examples": [
         "matplotlib",
@@ -46,32 +54,32 @@
         "sphinx_bootstrap_theme",
         "numpydoc",
         "sphinx_toolbox",
-        "docutils==0.16"
+        "docutils==0.16",
     ],
 }
 
-with open(os.path.join(HERE, 'autosklearn', '__version__.py')) as fh:
+with open(os.path.join(HERE, "autosklearn", "__version__.py")) as fh:
     version = fh.readlines()[-1].split()[-1].strip("\"'")
 
 
-with open(os.path.join(HERE, 'README.md')) as fh:
+with open(os.path.join(HERE, "README.md")) as fh:
     long_description = fh.read()
 
 
 setup(
-    name='auto-sklearn',
-    author='Matthias Feurer',
-    author_email='feurerm@informatik.uni-freiburg.de',
-    description='Automated machine learning.',
+    name="auto-sklearn",
+    author="Matthias Feurer",
+    author_email="feurerm@informatik.uni-freiburg.de",
+    description="Automated machine learning.",
     long_description=long_description,
-    long_description_content_type='text/markdown',
+    long_description_content_type="text/markdown",
     version=version,
-    packages=find_packages(exclude=['test', 'scripts', 'examples']),
+    packages=find_packages(exclude=["test", "scripts", "examples"]),
     extras_require=extras_reqs,
     install_requires=install_reqs,
     include_package_data=True,
-    license='BSD3',
-    platforms=['Linux'],
+    license="BSD3",
+    platforms=["Linux"],
     classifiers=[
         "Environment :: Console",
         "Intended Audience :: Developers",
@@ -83,10 +91,10 @@
         "Operating System :: OS Independent",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
         "Topic :: Scientific/Engineering :: Information Analysis",
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
     ],
-    python_requires='>=3.7',
-    url='https://automl.github.io/auto-sklearn',
+    python_requires=">=3.7",
+    url="https://automl.github.io/auto-sklearn",
 )
diff --git a/test/conftest.py b/test/conftest.py
index d3df7508cd..16a285b9df 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -3,12 +3,12 @@
 import time
 import unittest.mock
 
-from dask.distributed import Client, get_client
 import psutil
 import pytest
+from dask.distributed import Client, get_client
 
-from autosklearn.automl_common.common.utils.backend import create, Backend
 from autosklearn.automl import AutoML
+from autosklearn.automl_common.common.utils.backend import Backend, create
 
 
 class AutoMLStub(AutoML):
@@ -36,9 +36,11 @@ def automl_stub(request):
 def backend(request):
 
     test_dir = os.path.dirname(__file__)
-    tmp = os.path.join(test_dir, '.tmp__%s__%s' % (request.module.__name__, request.node.name))
+    tmp = os.path.join(
+        test_dir, ".tmp__%s__%s" % (request.module.__name__, request.node.name)
+    )
 
-    for dir in (tmp, ):
+    for dir in (tmp,):
         for i in range(10):
             if os.path.exists(dir):
                 try:
@@ -49,14 +51,12 @@ def backend(request):
 
     # Make sure the folders we wanna create do not already exist.
     backend = create(
-        temporary_directory=tmp,
-        output_directory=None,
-        prefix="auto-sklearn"
+        temporary_directory=tmp, output_directory=None, prefix="auto-sklearn"
     )
 
     def get_finalizer(tmp_dir):
         def session_run_at_end():
-            for dir in (tmp_dir, ):
+            for dir in (tmp_dir,):
                 for i in range(10):
                     if os.path.exists(dir):
                         try:
@@ -64,7 +64,9 @@ def session_run_at_end():
                             break
                         except OSError:
                             time.sleep(1)
+
         return session_run_at_end
+
     request.addfinalizer(get_finalizer(tmp))
 
     return backend
@@ -72,7 +74,7 @@ def session_run_at_end():
 
 @pytest.fixture(scope="function")
 def tmp_dir(request):
-    return _dir_fixture('tmp', request)
+    return _dir_fixture("tmp", request)
 
 
 def _dir_fixture(dir_type, request):
@@ -124,8 +126,10 @@ def session_run_at_end():
             client.shutdown()
             client.close()
             del client
+
         return session_run_at_end
-    request.addfinalizer(get_finalizer(client.scheduler_info()['address']))
+
+    request.addfinalizer(get_finalizer(client.scheduler_info()["address"]))
 
     return client
 
@@ -149,8 +153,10 @@ def session_run_at_end():
             client.shutdown()
             client.close()
             del client
+
         return session_run_at_end
-    request.addfinalizer(get_finalizer(client.scheduler_info()['address']))
+
+    request.addfinalizer(get_finalizer(client.scheduler_info()["address"]))
 
     return client
 
diff --git a/test/test_automl/automl_utils.py b/test/test_automl/automl_utils.py
index 768f94ff8d..577ea97359 100644
--- a/test/test_automl/automl_utils.py
+++ b/test/test_automl/automl_utils.py
@@ -1,17 +1,17 @@
 # -*- encoding: utf-8 -*-
-import re
-import os
-import glob
 import typing
 
-import numpy as np
+import glob
+import os
+import re
 
+import numpy as np
 
 scores_dict = {
-    'train_single': ["single_best_train_score", "single_best_optimization_score"],
-    'test_single': ["single_best_test_score"],
-    'train_ensamble': ["ensemble_optimization_score"],
-    'test_ensamble': ["ensemble_test_score"]
+    "train_single": ["single_best_train_score", "single_best_optimization_score"],
+    "test_single": ["single_best_test_score"],
+    "train_ensamble": ["ensemble_optimization_score"],
+    "test_ensamble": ["ensemble_test_score"],
 }
 
 
@@ -19,15 +19,15 @@ def print_debug_information(automl):
 
     # In case it is called with estimator,
     # Get the automl object
-    if hasattr(automl, 'automl_'):
+    if hasattr(automl, "automl_"):
         automl = automl.automl_
 
     # Log file path
-    log_file = glob.glob(os.path.join(
-        automl._backend.temporary_directory, 'AutoML*.log'))[0]
+    log_file = glob.glob(
+        os.path.join(automl._backend.temporary_directory, "AutoML*.log")
+    )[0]
 
-    include_messages = ['INFO', 'DEBUG', 'WARN',
-                        'CRITICAL', 'ERROR', 'FATAL']
+    include_messages = ["INFO", "DEBUG", "WARN", "CRITICAL", "ERROR", "FATAL"]
 
     # There is a lot of content in the log files. Only
     # parsing the main message and ignore the metalearning
@@ -37,53 +37,69 @@ def print_debug_information(automl):
             content = logfile.readlines()
 
         # Get the messages to debug easier!
-        content = [line for line in content if any(
-            msg in line for msg in include_messages
-        ) and 'metalearning' not in line]
+        content = [
+            line
+            for line in content
+            if any(msg in line for msg in include_messages)
+            and "metalearning" not in line
+        ]
 
     except Exception as e:
         return str(e)
 
     # Also add the run history if any
-    if hasattr(automl, 'runhistory_') and hasattr(automl.runhistory_, 'data'):
+    if hasattr(automl, "runhistory_") and hasattr(automl.runhistory_, "data"):
         for k, v in automl.runhistory_.data.items():
             content += ["{}->{}".format(k, v)]
     else:
-        content += ['No RunHistory']
+        content += ["No RunHistory"]
 
     # Also add the ensemble history if any
     if len(automl.ensemble_performance_history) > 0:
         content += [str(h) for h in automl.ensemble_performance_history]
     else:
-        content += ['No Ensemble History']
+        content += ["No Ensemble History"]
 
     return os.linesep.join(content)
 
 
 def _includes(scores, all_scores):
-    return all(score in all_scores for score in scores) and len(scores) == len(all_scores)
+    return all(score in all_scores for score in scores) and len(scores) == len(
+        all_scores
+    )
 
 
 def count_succeses(cv_results):
     return np.sum(
-        [status in ['Success', 'Success (but do not advance to higher budget)']
-         for status in cv_results['status']]
+        [
+            status in ["Success", "Success (but do not advance to higher budget)"]
+            for status in cv_results["status"]
+        ]
     )
 
 
 def includes_all_scores(scores):
-    all_scores = scores_dict["train_single"] + scores_dict["test_single"] + \
-                 scores_dict["train_ensamble"] + scores_dict["test_ensamble"] + ["Timestamp"]
+    all_scores = (
+        scores_dict["train_single"]
+        + scores_dict["test_single"]
+        + scores_dict["train_ensamble"]
+        + scores_dict["test_ensamble"]
+        + ["Timestamp"]
+    )
     return _includes(scores, all_scores)
 
 
 def include_single_scores(scores):
-    all_scores = scores_dict["train_single"] + scores_dict["test_single"] + ["Timestamp"]
+    all_scores = (
+        scores_dict["train_single"] + scores_dict["test_single"] + ["Timestamp"]
+    )
     return _includes(scores, all_scores)
 
 
 def includes_train_scores(scores):
-    all_scores = scores_dict["train_single"] + scores_dict["train_ensamble"] + ["Timestamp"]
+    all_scores = (
+        scores_dict["train_single"] + scores_dict["train_ensamble"] + ["Timestamp"]
+    )
     return _includes(scores, all_scores)
 
 
@@ -113,7 +129,7 @@ def parse_logfile(self) -> typing.List[str]:
         assert os.path.exists(self.logfile), "{} not found".format(self.logfile)
 
         with open(self.logfile) as fh:
-            content = [line.strip() for line in fh if re.search(r'[\w+]', line)]
+            content = [line.strip() for line in fh if re.search(r"[\w+]", line)]
         return content
 
     def count_ensembler_iterations(self) -> int:
@@ -129,11 +145,12 @@ def count_ensembler_iterations(self) -> int:
             # We expect the start msg to be something like:
             # [DEBUG] [2020-11-26 19:22:42,160:EnsembleBuilder] \
             # Function called with argument: (61....
-            # [DEBUG] [2020-11-30 11:53:47,069:EnsembleBuilder] Function called with argument:
-            # (28.246965646743774, 1, False), {}
+            # [DEBUG] [2020-11-30 11:53:47,069:EnsembleBuilder] \
+            # Function called with argument: (28.246965646743774, 1, False), {}
             match = re.search(
-                r'EnsembleBuilder]\s+Function called with argument:\s+\(\d+\.\d+, (\d+), \w+',
-                line)
+                r"EnsembleBuilder]\s+Function called with argument:\s+\(\d+\.\d+, (\d+), \w+",  # noqa: E501
+                line,
+            )
             if match:
                 iterations.append(int(match.group(1)))
 
@@ -143,19 +160,15 @@ def count_ensembler_iterations(self) -> int:
             # time left: 61.266255
             # [DEBUG] [2020-11-27 20:27:28,044:EnsembleBuilder] Starting iteration 2,
             # time left: 10.603252
-            match = re.search(
-                r'EnsembleBuilder]\s+Starting iteration (\d+)',
-                line)
+            match = re.search(r"EnsembleBuilder]\s+Starting iteration (\d+)", line)
             if match:
                 iterations_from_inside_ensemble_builder.append(int(match.group(1)))
 
             # The ensemble builder might not be called if there is no time.
             # Here we expect the msg:
-            # [DEBUG] [2020-11-27 20:27:28,044:EnsembleBuilder] Not starting iteration 2,
-            # as time left: 1.59324
-            match = re.search(
-                r'EnsembleBuilder]\s+Not starting iteration (\d+)',
-                line)
+            # [DEBUG] [2020-11-27 20:27:28,044:EnsembleBuilder] \
+            #   Not starting iteration 2, as time left: 1.59324
+            match = re.search(r"EnsembleBuilder]\s+Not starting iteration (\d+)", line)
             if match:
                 iterations_from_inside_ensemble_builder.append(int(match.group(1)))
 
@@ -174,49 +187,80 @@ def count_ensembler_success_pynisher_calls(self) -> int:
         # [DEBUG] [2020-11-30 11:54:05,984:EnsembleBuilder] return value:
         # (([{'Timestamp': Timestamp('2020-11- 30 11:54:05.983837'),
         # 'ensemble_optimization_score': 0.9787234042553191}], 50, None, None, None), 0)
-        return_msgs = len([line for line in self.lines if re.search(
-            r'EnsembleBuilder]\s+return value:.*Timestamp', line)])
+        return_msgs = len(
+            [
+                line
+                for line in self.lines
+                if re.search(r"EnsembleBuilder]\s+return value:.*Timestamp", line)
+            ]
+        )
 
         return return_msgs
 
     def count_tae_pynisher_calls(self) -> int:
         # We expect the return msg to be something like:
-        # [DEBUG] [2020-12-16 11:57:08,987:Client-pynisher] Function called with argument: ()
-        # , {'queue': <multiprocessing.queues.Queue object at 0x7f9e3cfaae20>, 'config': 1
-        # [DEBUG] [2020-12-16 11:57:10,537:Client-pynisher] Function called with argument: ()
-        # , {'queue': <multiprocessing.queues.Queue object at 0x7f16f5d95c40>,
-        # 'config': Configuration:
+        """
+        [DEBUG] [2020-12-16 11:57:08,987:Client-pynisher] Function called with argument: (),
+            {'queue': <multiprocessing.queues.Queue object at 0x7f9e3cfaae20>, 'config': 1
+        [DEBUG] [2020-12-16 11:57:10,537:Client-pynisher] Function called with argument: (),
+            {'queue': <multiprocessing.queues.Queue object at 0x7f16f5d95c40>, 'config': Configuration:
+        """  # noqa: E501
         # Only the parenthesis below need to be escaped, ] and { do not.
-        call_msgs = len([line for line in self.lines if re.search(
-            r'pynisher]\s+Function called with argument: \(\), {', line)])
+        call_msgs = len(
+            [
+                line
+                for line in self.lines
+                if re.search(
+                    r"pynisher]\s+Function called with argument: \(\), {", line
+                )
+            ]
+        )
         return call_msgs
 
     def count_tae_pynisher_returns(self) -> int:
         # We expect the return msg to be something like:
         # [DEBUG] [2020-11-30 11:53:11,264:pynisher] return value: (None, 0)
         # [DEBUG] [2020-11-30 11:53:13,768:pynisher] return value: (None, 0)
-        return_msgs = len([line for line in self.lines if re.search(
-            r'pynisher]\s+return value:\s+', line)])
+        return_msgs = len(
+            [
+                line
+                for line in self.lines
+                if re.search(r"pynisher]\s+return value:\s+", line)
+            ]
+        )
         # When the pynisher pipe is prematurely closed, we also expect:
         # Your function call closed the pipe prematurely
         # -> Subprocess probably got an uncatchable signal
         # We expect the return msg to be something like:
         # OR
         # Something else went wrong, sorry.
-        premature_msgs = len([line for line in self.lines if re.search(
-            r'pynisher]\s+Your function call closed the pipe prematurely', line)])
-        failure_msgs = len([line for line in self.lines if re.search(
-            r'pynisher]\s+Something else went wrong, sorry.', line)])
+        premature_msgs = len(
+            [
+                line
+                for line in self.lines
+                if re.search(
+                    r"pynisher]\s+Your function call closed the pipe prematurely", line
+                )
+            ]
+        )
+        failure_msgs = len(
+            [
+                line
+                for line in self.lines
+                if re.search(r"pynisher]\s+Something else went wrong, sorry.", line)
+            ]
+        )
         return return_msgs + premature_msgs + failure_msgs
 
     def get_automl_setting_from_log(self, dataset_name: str, setting: str) -> str:
         for line in self.lines:
             # We expect messages of the form
-            # [DEBUG] [2020-11-30 11:53:10,457:AutoML(5):breast_cancer]   ensemble_size: 50
-            # [DEBUG] [2020-11-30 11:53:10,457:AutoML(5):breast_cancer]   ensemble_nbest: 50
-            match = re.search(
-                f"{dataset_name}]\\s*{setting}\\s*:\\s*(\\w+)",
-                line)
+            """
+            [DEBUG] [2020-11-30 11:53:10,457:AutoML(5):breast_cancer]   ensemble_size: 50
+            [DEBUG] [2020-11-30 11:53:10,457:AutoML(5):breast_cancer]   ensemble_nbest: 50
+            """  # noqa: E501
+            match = re.search(f"{dataset_name}]\\s*{setting}\\s*:\\s*(\\w+)", line)
             if match:
                 return match.group(1)
+
         return None
diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py
index 4e509d8755..37040f0560 100644
--- a/test/test_automl/test_automl.py
+++ b/test/test_automl/test_automl.py
@@ -1,12 +1,11 @@
 # -*- encoding: utf-8 -*-
 from typing import Dict, List, Union
 
+import glob
 import itertools
 import os
 import pickle
-import sys
 import time
-import glob
 import unittest
 import unittest.mock
 import warnings
@@ -14,35 +13,46 @@
 import numpy as np
 import pandas as pd
 import pytest
-from scipy.sparse import csr_matrix, spmatrix
 import sklearn.datasets
-from sklearn.ensemble import VotingRegressor, VotingClassifier
-from smac.scenario.scenario import Scenario
+from scipy.sparse import csr_matrix, spmatrix
+from sklearn.ensemble import VotingClassifier, VotingRegressor
 from smac.facade.roar_facade import ROAR
+from smac.scenario.scenario import Scenario
+from smac.tae import StatusType
 
-from autosklearn.automl import AutoML, AutoMLClassifier, AutoMLRegressor, _model_predict
-from autosklearn.data.validation import InputValidator
 import autosklearn.automl
-from autosklearn.data.xy_data_manager import XYDataManager
-from autosklearn.metrics import (
-    accuracy, log_loss, balanced_accuracy, default_metric_for_task
-)
-from autosklearn.evaluation.abstract_evaluator import MyDummyClassifier, MyDummyRegressor
-from autosklearn.util.data import default_dataset_compression_arg
-from autosklearn.util.logging_ import PickableLoggerAdapter
 import autosklearn.pipeline.util as putil
+from autosklearn.automl import AutoML, AutoMLClassifier, AutoMLRegressor, _model_predict
 from autosklearn.constants import (
-    MULTICLASS_CLASSIFICATION,
     BINARY_CLASSIFICATION,
+    CLASSIFICATION_TASKS,
+    MULTICLASS_CLASSIFICATION,
     MULTILABEL_CLASSIFICATION,
-    REGRESSION,
     MULTIOUTPUT_REGRESSION,
-    CLASSIFICATION_TASKS,
+    REGRESSION,
 )
-from smac.tae import StatusType
+from autosklearn.data.validation import InputValidator
+from autosklearn.data.xy_data_manager import XYDataManager
+from autosklearn.evaluation.abstract_evaluator import (
+    MyDummyClassifier,
+    MyDummyRegressor,
+)
+from autosklearn.metrics import (
+    accuracy,
+    balanced_accuracy,
+    default_metric_for_task,
+    log_loss,
+)
+from autosklearn.util.data import default_dataset_compression_arg
+from autosklearn.util.logging_ import PickableLoggerAdapter
 
-sys.path.append(os.path.dirname(__file__))
-from automl_utils import print_debug_information, count_succeses, AutoMLLogParser, includes_all_scores, includes_train_scores, performance_over_time_is_plausible  # noqa (E402: module level import not at top of file)
+from test.test_automl.automl_utils import (
+    AutoMLLogParser,
+    count_succeses,
+    includes_train_scores,
+    performance_over_time_is_plausible,
+    print_debug_information,
+)
 
 
 class AutoMLStub(AutoML):
@@ -57,7 +67,7 @@ def __del__(self):
 
 
 def test_fit(dask_client):
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
     automl = autosklearn.automl.AutoML(
         seed=0,
         time_left_for_this_task=30,
@@ -80,13 +90,7 @@ def test_fit(dask_client):
 
 def test_fit_roar(dask_client_single_worker):
     def get_roar_object_callback(
-            scenario_dict,
-            seed,
-            ta,
-            ta_kwargs,
-            dask_client,
-            n_jobs,
-            **kwargs
+        scenario_dict, seed, ta, ta_kwargs, dask_client, n_jobs, **kwargs
     ):
         """Random online adaptive racing.
 
@@ -101,7 +105,7 @@ def get_roar_object_callback(
             n_jobs=n_jobs,
         )
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
     automl = autosklearn.automl.AutoML(
         time_left_for_this_task=30,
         per_run_time_limit=5,
@@ -126,8 +130,7 @@ def test_refit_shuffle_on_fail(dask_client):
 
     failing_model = unittest.mock.Mock()
     failing_model.fit.side_effect = [ValueError(), ValueError(), None]
-    failing_model.fit_transformer.side_effect = [
-        ValueError(), ValueError(), (None, {})]
+    failing_model.fit_transformer.side_effect = [ValueError(), ValueError(), (None, {})]
     failing_model.get_max_iter.return_value = 100
 
     auto = AutoML(30, 5, dask_client=dask_client)
@@ -135,7 +138,7 @@ def test_refit_shuffle_on_fail(dask_client):
     ensemble_mock.get_selected_model_identifiers.return_value = [(1, 1, 50.0)]
     auto.ensemble_ = ensemble_mock
     auto.InputValidator = InputValidator()
-    for budget_type in [None, 'iterations']:
+    for budget_type in [None, "iterations"]:
         auto._budget_type = budget_type
 
         auto.models_ = {(1, 1, 50.0): failing_model}
@@ -153,12 +156,11 @@ def test_refit_shuffle_on_fail(dask_client):
 
 
 def test_only_loads_ensemble_models(automl_stub):
-
     def side_effect(ids, *args, **kwargs):
         return models if ids is identifiers else {}
 
     # Add a resampling strategy as this is required by load_models
-    automl_stub._resampling_strategy = 'holdout'
+    automl_stub._resampling_strategy = "holdout"
     identifiers = [(1, 2), (3, 4)]
 
     models = [42]
@@ -171,7 +173,7 @@ def side_effect(ids, *args, **kwargs):
     assert models == automl_stub.models_
     assert automl_stub.cv_models_ is None
 
-    automl_stub._resampling_strategy = 'cv'
+    automl_stub._resampling_strategy = "cv"
 
     models = [42]
     automl_stub._backend.load_cv_models_by_identifiers.side_effect = side_effect
@@ -192,7 +194,7 @@ def test_check_for_models_if_no_ensemble(automl_stub):
 def test_raises_if_no_models(automl_stub):
     automl_stub._backend.load_ensemble.return_value = None
     automl_stub._backend.list_all_models.return_value = []
-    automl_stub._resampling_strategy = 'holdout'
+    automl_stub._resampling_strategy = "holdout"
 
     automl_stub._disable_evaluator_output = False
     with pytest.raises(ValueError):
@@ -205,7 +207,7 @@ def test_raises_if_no_models(automl_stub):
 def test_delete_non_candidate_models(dask_client):
 
     seed = 555
-    X, Y, _, _ = putil.get_dataset('iris')
+    X, Y, _, _ = putil.get_dataset("iris")
     automl = autosklearn.automl.AutoML(
         delete_tmp_folder_after_terminate=False,
         time_left_for_this_task=60,
@@ -213,11 +215,8 @@ def test_delete_non_candidate_models(dask_client):
         ensemble_nbest=3,
         seed=seed,
         initial_configurations_via_metalearning=0,
-        resampling_strategy='holdout',
-        include={
-            'classifier': ['sgd'],
-            'feature_preprocessor': ['no_preprocessing']
-        },
+        resampling_strategy="holdout",
+        include={"classifier": ["sgd"], "feature_preprocessor": ["no_preprocessing"]},
         metric=accuracy,
         dask_client=dask_client,
         # Force model to be deleted. That is, from 50 which is the
@@ -229,23 +228,31 @@ def test_delete_non_candidate_models(dask_client):
 
     # Assert at least one model file has been deleted and that there were no
     # deletion errors
-    log_file_path = glob.glob(os.path.join(
-        automl._backend.temporary_directory, 'AutoML(' + str(seed) + '):*.log'))
+    log_file_path = glob.glob(
+        os.path.join(
+            automl._backend.temporary_directory, "AutoML(" + str(seed) + "):*.log"
+        )
+    )
     with open(log_file_path[0]) as log_file:
         log_content = log_file.read()
-        assert 'Deleted files of non-candidate model' in log_content, log_content
-        assert 'Failed to delete files of non-candidate model' not in log_content, log_content
-        assert 'Failed to lock model' not in log_content, log_content
+        assert "Deleted files of non-candidate model" in log_content, log_content
+        assert (
+            "Failed to delete files of non-candidate model" not in log_content
+        ), log_content
+        assert "Failed to lock model" not in log_content, log_content
 
     # Assert that the files of the models used by the ensemble weren't deleted
     model_files = automl._backend.list_all_models(seed=seed)
     model_files_idx = set()
     for m_file in model_files:
         # Extract the model identifiers from the filename
-        m_file = os.path.split(m_file)[1].replace('.model', '').split('.', 2)
+        m_file = os.path.split(m_file)[1].replace(".model", "").split(".", 2)
         model_files_idx.add((int(m_file[0]), int(m_file[1]), float(m_file[2])))
     ensemble_members_idx = set(automl.ensemble_.identifiers_)
-    assert ensemble_members_idx.issubset(model_files_idx), (ensemble_members_idx, model_files_idx)
+    assert ensemble_members_idx.issubset(model_files_idx), (
+        ensemble_members_idx,
+        model_files_idx,
+    )
 
     del automl
 
@@ -257,17 +264,23 @@ def test_binary_score_and_include(dask_client):
     """
 
     data = sklearn.datasets.make_classification(
-        n_samples=400, n_features=10, n_redundant=1, n_informative=3,
-        n_repeated=1, n_clusters_per_class=2, random_state=1)
+        n_samples=400,
+        n_features=10,
+        n_redundant=1,
+        n_informative=3,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        random_state=1,
+    )
     X_train = data[0][:200]
     Y_train = data[1][:200]
     X_test = data[0][200:]
     Y_test = data[1][200:]
 
     automl = autosklearn.automl.AutoML(
-        20, 5,
-        include={'classifier': ['sgd'],
-                 'feature_preprocessor': ['no_preprocessing']},
+        20,
+        5,
+        include={"classifier": ["sgd"], "feature_preprocessor": ["no_preprocessing"]},
         metric=accuracy,
         dask_client=dask_client,
     )
@@ -286,10 +299,11 @@ def test_binary_score_and_include(dask_client):
 
 def test_automl_outputs(dask_client):
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
-    name = 'iris'
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
+    name = "iris"
     auto = autosklearn.automl.AutoML(
-        30, 5,
+        30,
+        5,
         initial_configurations_via_metalearning=0,
         seed=100,
         metric=accuracy,
@@ -307,59 +321,70 @@ def test_automl_outputs(dask_client):
     )
 
     data_manager_file = os.path.join(
-        auto._backend.temporary_directory,
-        '.auto-sklearn',
-        'datamanager.pkl'
+        auto._backend.temporary_directory, ".auto-sklearn", "datamanager.pkl"
     )
 
     # pickled data manager (without one hot encoding!)
-    with open(data_manager_file, 'rb') as fh:
+    with open(data_manager_file, "rb") as fh:
         D = pickle.load(fh)
-        assert np.allclose(D.data['X_train'], X_train)
+        assert np.allclose(D.data["X_train"], X_train)
 
     # Check that all directories are there
     fixture = [
-        'true_targets_ensemble.npy',
-        'start_time_100',
-        'datamanager.pkl',
-        'ensemble_read_preds.pkl',
-        'ensemble_read_losses.pkl',
-        'runs',
-        'ensembles',
-        'ensemble_history.json',
+        "true_targets_ensemble.npy",
+        "start_time_100",
+        "datamanager.pkl",
+        "ensemble_read_preds.pkl",
+        "ensemble_read_losses.pkl",
+        "runs",
+        "ensembles",
+        "ensemble_history.json",
     ]
-    assert (
-        sorted(os.listdir(os.path.join(auto._backend.temporary_directory,
-                                       '.auto-sklearn')))
-        == sorted(fixture)
-    )
+    assert sorted(
+        os.listdir(os.path.join(auto._backend.temporary_directory, ".auto-sklearn"))
+    ) == sorted(fixture)
 
     # At least one ensemble, one validation, one test prediction and one
     # model and one ensemble
-    fixture = glob.glob(os.path.join(
-        auto._backend.temporary_directory,
-        '.auto-sklearn', 'runs', '*', 'predictions_ensemble*npy',
-    ))
+    fixture = glob.glob(
+        os.path.join(
+            auto._backend.temporary_directory,
+            ".auto-sklearn",
+            "runs",
+            "*",
+            "predictions_ensemble*npy",
+        )
+    )
     assert len(fixture) > 0
 
-    fixture = glob.glob(os.path.join(auto._backend.temporary_directory, '.auto-sklearn',
-                                     'runs', '*', '100.*.model'))
+    fixture = glob.glob(
+        os.path.join(
+            auto._backend.temporary_directory,
+            ".auto-sklearn",
+            "runs",
+            "*",
+            "100.*.model",
+        )
+    )
     assert len(fixture) > 0
 
-    fixture = os.listdir(os.path.join(auto._backend.temporary_directory,
-                                      '.auto-sklearn', 'ensembles'))
-    assert '100.0000000000.ensemble' in fixture
+    fixture = os.listdir(
+        os.path.join(auto._backend.temporary_directory, ".auto-sklearn", "ensembles")
+    )
+    assert "100.0000000000.ensemble" in fixture
 
     # Start time
-    start_time_file_path = os.path.join(auto._backend.temporary_directory,
-                                        '.auto-sklearn', "start_time_100")
-    with open(start_time_file_path, 'r') as fh:
+    start_time_file_path = os.path.join(
+        auto._backend.temporary_directory, ".auto-sklearn", "start_time_100"
+    )
+    with open(start_time_file_path, "r") as fh:
         start_time = float(fh.read())
     assert time.time() - start_time >= 10, print_debug_information(auto)
 
     # Then check that the logger matches the run expectation
-    logfile = glob.glob(os.path.join(
-           auto._backend.temporary_directory, 'AutoML*.log'))[0]
+    logfile = glob.glob(os.path.join(auto._backend.temporary_directory, "AutoML*.log"))[
+        0
+    ]
     parser = AutoMLLogParser(logfile)
 
     # The number of ensemble trajectories properly in log file
@@ -381,42 +406,61 @@ def test_automl_outputs(dask_client):
     # Dummy not in run history
     total_calls_to_pynisher_log = parser.count_tae_pynisher_calls() - 1
     total_returns_from_pynisher_log = parser.count_tae_pynisher_returns() - 1
-    total_elements_rh = len([run_value for run_value in auto.runhistory_.data.values(
-    ) if run_value.status == StatusType.RUNNING])
+    total_elements_rh = len(
+        [
+            run_value
+            for run_value in auto.runhistory_.data.values()
+            if run_value.status == StatusType.RUNNING
+        ]
+    )
 
     # Make sure we register all calls to pynisher
     # The less than or equal here is added as a WA as
     # https://github.com/automl/SMAC3/pull/712 is not yet integrated
-    assert total_elements_rh <= total_calls_to_pynisher_log, print_debug_information(auto)
+    assert total_elements_rh <= total_calls_to_pynisher_log, print_debug_information(
+        auto
+    )
 
     # Make sure we register all returns from pynisher
-    assert total_elements_rh <= total_returns_from_pynisher_log, print_debug_information(auto)
+    assert (
+        total_elements_rh <= total_returns_from_pynisher_log
+    ), print_debug_information(auto)
 
     # Lastly check that settings are print to logfile
-    ensemble_size = parser.get_automl_setting_from_log(auto._dataset_name, 'ensemble_size')
+    ensemble_size = parser.get_automl_setting_from_log(
+        auto._dataset_name, "ensemble_size"
+    )
     assert auto._ensemble_size == int(ensemble_size)
 
     del auto
 
 
-@pytest.mark.parametrize("datasets", [('breast_cancer', BINARY_CLASSIFICATION),
-                                      ('wine', MULTICLASS_CLASSIFICATION),
-                                      ('diabetes', REGRESSION)])
+@pytest.mark.parametrize(
+    "datasets",
+    [
+        ("breast_cancer", BINARY_CLASSIFICATION),
+        ("wine", MULTICLASS_CLASSIFICATION),
+        ("diabetes", REGRESSION),
+    ],
+)
 def test_do_dummy_prediction(dask_client, datasets):
 
     name, task = datasets
 
     X_train, Y_train, X_test, Y_test = putil.get_dataset(name)
     datamanager = XYDataManager(
-        X_train, Y_train,
-        X_test, Y_test,
+        X_train,
+        Y_train,
+        X_test,
+        Y_test,
         task=task,
         dataset_name=name,
-        feat_type={i: 'numerical' for i in range(X_train.shape[1])},
+        feat_type={i: "numerical" for i in range(X_train.shape[1])},
     )
 
     auto = autosklearn.automl.AutoML(
-        20, 5,
+        20,
+        5,
         initial_configurations_via_metalearning=25,
         metric=accuracy,
         dask_client=dask_client,
@@ -433,18 +477,18 @@ def test_do_dummy_prediction(dask_client, datasets):
     D = auto._backend.load_datamanager()
 
     # Check if data manager is correcly loaded
-    assert D.info['task'] == datamanager.info['task']
+    assert D.info["task"] == datamanager.info["task"]
     auto._do_dummy_prediction(D, 1)
 
     # Ensure that the dummy predictions are not in the current working
     # directory, but in the temporary directory.
-    unexpected_directory = os.path.join(os.getcwd(), '.auto-sklearn')
+    unexpected_directory = os.path.join(os.getcwd(), ".auto-sklearn")
     expected_directory = os.path.join(
         auto._backend.temporary_directory,
-        '.auto-sklearn',
-        'runs',
-        '1_1_0.0',
-        'predictions_ensemble_1_1_0.0.npy'
+        ".auto-sklearn",
+        "runs",
+        "1_1_0.0",
+        "predictions_ensemble_1_1_0.0.npy",
     )
     assert not os.path.exists(unexpected_directory)
     assert os.path.exists(expected_directory)
@@ -454,27 +498,30 @@ def test_do_dummy_prediction(dask_client, datasets):
     del auto
 
 
-@unittest.mock.patch('autosklearn.evaluation.ExecuteTaFuncWithQueue.run')
+@unittest.mock.patch("autosklearn.evaluation.ExecuteTaFuncWithQueue.run")
 def test_fail_if_dummy_prediction_fails(ta_run_mock, dask_client):
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
     datamanager = XYDataManager(
-        X_train, Y_train,
-        X_test, Y_test,
+        X_train,
+        Y_train,
+        X_test,
+        Y_test,
         task=2,
-        feat_type={i: 'Numerical' for i in range(X_train.shape[1])},
-        dataset_name='iris',
+        feat_type={i: "Numerical" for i in range(X_train.shape[1])},
+        dataset_name="iris",
     )
 
     time_for_this_task = 30
     per_run_time = 10
-    auto = autosklearn.automl.AutoML(time_for_this_task,
-                                     per_run_time,
-                                     initial_configurations_via_metalearning=25,
-                                     metric=accuracy,
-                                     dask_client=dask_client,
-                                     delete_tmp_folder_after_terminate=False,
-                                     )
+    auto = autosklearn.automl.AutoML(
+        time_for_this_task,
+        per_run_time,
+        initial_configurations_via_metalearning=25,
+        metric=accuracy,
+        dask_client=dask_client,
+        delete_tmp_folder_after_terminate=False,
+    )
     auto._backend = auto._create_backend()
     auto._backend._make_internals_directory()
     auto._backend.save_datamanager(datamanager)
@@ -497,55 +544,55 @@ def test_fail_if_dummy_prediction_fails(ta_run_mock, dask_client):
         auto._do_dummy_prediction(datamanager, 1)
     except ValueError:
         raised = True
-    assert not raised, 'Exception raised'
+    assert not raised, "Exception raised"
 
     # Case 2. Check that if statustype returned by ta.run() != success,
     # the function raises error.
     ta_run_mock.return_value = StatusType.CRASHED, None, None, {}
     with pytest.raises(
         ValueError,
-        match='Dummy prediction failed with run state StatusType.CRASHED and additional output: {}.'  # noqa
+        match="Dummy prediction failed with run state StatusType.CRASHED and additional output: {}.",  # noqa
     ):
         auto._do_dummy_prediction(datamanager, 1)
 
     ta_run_mock.return_value = StatusType.ABORT, None, None, {}
     with pytest.raises(
         ValueError,
-        match='Dummy prediction failed with run state StatusType.ABORT '
-              'and additional output: {}.',
+        match="Dummy prediction failed with run state StatusType.ABORT "
+        "and additional output: {}.",
     ):
         auto._do_dummy_prediction(datamanager, 1)
     ta_run_mock.return_value = StatusType.TIMEOUT, None, None, {}
     with pytest.raises(
         ValueError,
-        match='Dummy prediction failed with run state StatusType.TIMEOUT '
-              'and additional output: {}.'
+        match="Dummy prediction failed with run state StatusType.TIMEOUT "
+        "and additional output: {}.",
     ):
         auto._do_dummy_prediction(datamanager, 1)
     ta_run_mock.return_value = StatusType.MEMOUT, None, None, {}
     with pytest.raises(
         ValueError,
-        match='Dummy prediction failed with run state StatusType.MEMOUT '
-              'and additional output: {}.',
+        match="Dummy prediction failed with run state StatusType.MEMOUT "
+        "and additional output: {}.",
     ):
         auto._do_dummy_prediction(datamanager, 1)
     ta_run_mock.return_value = StatusType.CAPPED, None, None, {}
     with pytest.raises(
         ValueError,
-        match='Dummy prediction failed with run state StatusType.CAPPED '
-              'and additional output: {}.'
+        match="Dummy prediction failed with run state StatusType.CAPPED "
+        "and additional output: {}.",
     ):
         auto._do_dummy_prediction(datamanager, 1)
 
-    ta_run_mock.return_value = StatusType.CRASHED, None, None, {'exitcode': -6}
+    ta_run_mock.return_value = StatusType.CRASHED, None, None, {"exitcode": -6}
     with pytest.raises(
         ValueError,
-        match='The error suggests that the provided memory limits were too tight.',
+        match="The error suggests that the provided memory limits are too tight.",
     ):
         auto._do_dummy_prediction(datamanager, 1)
 
 
-@unittest.mock.patch('autosklearn.smbo.AutoMLSMBO.run_smbo')
+@unittest.mock.patch("autosklearn.smbo.AutoMLSMBO.run_smbo")
 def test_exceptions_inside_log_in_smbo(smbo_run_mock, dask_client):
 
     # Below importing and shutdown is a workaround, to make sure
@@ -553,6 +600,7 @@ def test_exceptions_inside_log_in_smbo(smbo_run_mock, dask_client):
     # this test with multiple other test at the same time causes this
     # test to fail. This resets the singletons of the logging class
     import logging
+
     logging.shutdown()
 
     automl = autosklearn.automl.AutoML(
@@ -563,15 +611,15 @@ def test_exceptions_inside_log_in_smbo(smbo_run_mock, dask_client):
         delete_tmp_folder_after_terminate=False,
     )
 
-    dataset_name = 'test_exceptions_inside_log'
+    dataset_name = "test_exceptions_inside_log"
 
     # Create a custom exception to prevent other errors to slip in
     class MyException(Exception):
         pass
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
     # The first call is on dummy predictor failure
-    message = str(np.random.randint(100)) + '_run_smbo'
+    message = str(np.random.randint(100)) + "_run_smbo"
     smbo_run_mock.side_effect = MyException(message)
 
     with pytest.raises(MyException):
@@ -583,10 +631,12 @@ class MyException(Exception):
         )
 
     # make sure that the logfile was created
-    logger_name = 'AutoML(%d):%s' % (1, dataset_name)
+    logger_name = "AutoML(%d):%s" % (1, dataset_name)
     logger = logging.getLogger(logger_name)
-    logfile = os.path.join(automl._backend.temporary_directory, logger_name + '.log')
-    assert os.path.exists(logfile), print_debug_information(automl) + str(automl._clean_logger())
+    logfile = os.path.join(automl._backend.temporary_directory, logger_name + ".log")
+    assert os.path.exists(logfile), print_debug_information(automl) + str(
+        automl._clean_logger()
+    )
 
     # Give some time for the error message to be printed in the
     # log file
@@ -604,19 +654,21 @@ class MyException(Exception):
     automl._clean_logger()
 
     if not found_message:
-        pytest.fail("Did not find {} in the log file {} for logger {}/{}/{}".format(
-            message,
-            print_debug_information(automl),
-            vars(automl._logger.logger),
-            vars(logger),
-            vars(logging.getLogger())
-        ))
+        pytest.fail(
+            "Did not find {} in the log file {} for logger {}/{}/{}".format(
+                message,
+                print_debug_information(automl),
+                vars(automl._logger.logger),
+                vars(logger),
+                vars(logging.getLogger()),
+            )
+        )
 
 
 @pytest.mark.parametrize("metric", [log_loss, balanced_accuracy])
 def test_load_best_individual_model(metric, dask_client):
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
     automl = autosklearn.automl.AutoML(
         time_left_for_this_task=30,
         per_run_time_limit=5,
@@ -645,9 +697,9 @@ def test_load_best_individual_model(metric, dask_client):
     assert get_models_with_weights[0][0] == 1.0
 
     # Match a toy dataset
-    if metric.name == 'balanced_accuracy':
+    if metric.name == "balanced_accuracy":
         assert automl.score(X_test, Y_test) > 0.9
-    elif metric.name == 'log_loss':
+    elif metric.name == "log_loss":
         # Seen values in github actions of 0.6978304740364537
         assert automl.score(X_test, Y_test) < 0.7
     else:
@@ -667,17 +719,18 @@ def test_fail_if_feat_type_on_pandas_input(dask_client):
         dask_client=dask_client,
     )
 
-    X_train = pd.DataFrame({'a': [1, 1], 'c': [1, 2]})
+    X_train = pd.DataFrame({"a": [1, 1], "c": [1, 2]})
     y_train = [1, 0]
-    with pytest.raises(
-        ValueError,
-        match=""
-        "providing the option feat_type to the fit method is not supported when using a Dataframe"
-    ):
+    msg = (
+        "providing the option feat_type to the fit method is not supported"
+        " when using a Dataframe."
+    )
+    with pytest.raises(ValueError, match=msg):
         automl.fit(
-            X_train, y_train,
+            X_train,
+            y_train,
             task=BINARY_CLASSIFICATION,
-            feat_type={1: 'Categorical', 2: 'Numerical'},
+            feat_type={1: "Categorical", 2: "Numerical"},
         )
 
 
@@ -686,7 +739,7 @@ def data_input_and_target_types():
 
     # Create valid inputs
     X_ndarray = np.random.random(size=(n_rows, 5))
-    X_ndarray[X_ndarray < .9] = 0
+    X_ndarray[X_ndarray < 0.9] = 0
 
     # Binary Classificaiton
     y_binary_ndarray = np.random.random(size=n_rows)
@@ -696,7 +749,9 @@ def data_input_and_target_types():
     # Multiclass classification
     y_multiclass_ndarray = np.random.random(size=n_rows)
     y_multiclass_ndarray[y_multiclass_ndarray > 0.66] = 2
-    y_multiclass_ndarray[(y_multiclass_ndarray <= 0.66) & (y_multiclass_ndarray >= 0.33)] = 1
+    y_multiclass_ndarray[
+        (y_multiclass_ndarray <= 0.66) & (y_multiclass_ndarray >= 0.33)
+    ] = 1
     y_multiclass_ndarray[y_multiclass_ndarray < 0.33] = 0
 
     # Multilabel classificaiton
@@ -789,11 +844,7 @@ def test_input_and_target_types(dask_client, X, y, X_test, y_test, task):
     # To save time fitting and only validate the inputs we only return
     # the configuration space
     automl.fit(
-        X=X,
-        y=y,
-        X_test=X_test,
-        y_test=y_test,
-        only_return_configuration_space=True
+        X=X, y=y, X_test=X_test, y_test=y_test, only_return_configuration_space=True
     )
     assert automl._task == task
     assert automl._metric.name == default_metric_for_task[task].name
@@ -801,21 +852,15 @@ def test_input_and_target_types(dask_client, X, y, X_test, y_test, task):
 
 def data_test_model_predict_outsputs_correct_shapes():
     datasets = sklearn.datasets
-    binary = datasets.make_classification(
-        n_samples=5, n_classes=2, random_state=0
-    )
+    binary = datasets.make_classification(n_samples=5, n_classes=2, random_state=0)
     multiclass = datasets.make_classification(
         n_samples=5, n_informative=3, n_classes=3, random_state=0
     )
     multilabel = datasets.make_multilabel_classification(
         n_samples=5, n_classes=3, random_state=0
     )
-    regression = datasets.make_regression(
-        n_samples=5, random_state=0
-    )
-    multioutput = datasets.make_regression(
-        n_samples=5, n_targets=3, random_state=0
-    )
+    regression = datasets.make_regression(n_samples=5, random_state=0)
+    multioutput = datasets.make_regression(n_samples=5, n_targets=3, random_state=0)
 
     # TODO issue 1169
     #   While testing output shapes, realised all models are wrapped to provide
@@ -841,17 +886,15 @@ def regressor(X, y):
     # How cross validation models are currently grouped together
     def voting_classifier(X, y):
         classifiers = [
-            MyDummyClassifier(config=1, random_state=0).fit(X, y)
-            for _ in range(5)
+            MyDummyClassifier(config=1, random_state=0).fit(X, y) for _ in range(5)
         ]
-        vc = VotingClassifier(estimators=None, voting='soft')
+        vc = VotingClassifier(estimators=None, voting="soft")
         vc.estimators_ = classifiers
         return vc
 
     def voting_regressor(X, y):
         regressors = [
-            MyDummyRegressor(config=1, random_state=0).fit(X, y)
-            for _ in range(5)
+            MyDummyRegressor(config=1, random_state=0).fit(X, y) for _ in range(5)
         ]
         vr = VotingRegressor(estimators=None)
         vr.estimators_ = regressors
@@ -859,41 +902,41 @@ def voting_regressor(X, y):
 
     test_data = {
         BINARY_CLASSIFICATION: {
-            'models': [classifier(*binary), voting_classifier(*binary)],
-            'data': binary,
+            "models": [classifier(*binary), voting_classifier(*binary)],
+            "data": binary,
             # prob of false/true for the one class
-            'expected_output_shape': (len(binary[0]), 2)
+            "expected_output_shape": (len(binary[0]), 2),
         },
         MULTICLASS_CLASSIFICATION: {
-            'models': [classifier(*multiclass), voting_classifier(*multiclass)],
-            'data': multiclass,
+            "models": [classifier(*multiclass), voting_classifier(*multiclass)],
+            "data": multiclass,
             # prob of true for each possible class
-            'expected_output_shape': (len(multiclass[0]), 3)
+            "expected_output_shape": (len(multiclass[0]), 3),
         },
         MULTILABEL_CLASSIFICATION: {
-            'models': [classifier(*multilabel), voting_classifier(*multilabel)],
-            'data': multilabel,
+            "models": [classifier(*multilabel), voting_classifier(*multilabel)],
+            "data": multilabel,
             # probability of true for each binary label
-            'expected_output_shape': (len(multilabel[0]), 3)  # type: ignore
+            "expected_output_shape": (len(multilabel[0]), 3),  # type: ignore
         },
         REGRESSION: {
-            'models': [regressor(*regression), voting_regressor(*regression)],
-            'data': regression,
+            "models": [regressor(*regression), voting_regressor(*regression)],
+            "data": regression,
             # array of single outputs
-            'expected_output_shape': (len(regression[0]), )
+            "expected_output_shape": (len(regression[0]),),
         },
         MULTIOUTPUT_REGRESSION: {
-            'models': [regressor(*multioutput), voting_regressor(*multioutput)],
-            'data': multioutput,
+            "models": [regressor(*multioutput), voting_regressor(*multioutput)],
+            "data": multioutput,
             # array of vector otuputs
-            'expected_output_shape': (len(multioutput[0]), 3)
-        }
+            "expected_output_shape": (len(multioutput[0]), 3),
+        },
     }
 
     return itertools.chain.from_iterable(
         [
-            (model, cfg['data'], task, cfg['expected_output_shape'])
-            for model in cfg['models']
+            (model, cfg["data"], task, cfg["expected_output_shape"])
+            for model in cfg["models"]
         ]
         for task, cfg in test_data.items()
     )
@@ -901,7 +944,7 @@ def voting_regressor(X, y):
 
 @pytest.mark.parametrize(
     "model, data, task, expected_output_shape",
-    data_test_model_predict_outsputs_correct_shapes()
+    data_test_model_predict_outsputs_correct_shapes(),
 )
 def test_model_predict_outputs_correct_shapes(model, data, task, expected_output_shape):
     X, y = data
@@ -912,12 +955,12 @@ def test_model_predict_outputs_correct_shapes(model, data, task, expected_output
 def test_model_predict_outputs_warnings_to_logs():
     X = list(range(20))
     task = REGRESSION
-    logger = PickableLoggerAdapter('test_model_predict_correctly_outputs_warnings')
+    logger = PickableLoggerAdapter("test_model_predict_correctly_outputs_warnings")
     logger.warning = unittest.mock.Mock()
 
     class DummyModel:
         def predict(self, x):
-            warnings.warn('test warning', Warning)
+            warnings.warn("test warning", Warning)
             return x
 
     model = DummyModel()
@@ -933,7 +976,7 @@ def test_model_predict_outputs_to_stdout_if_no_logger():
 
     class DummyModel:
         def predict(self, x):
-            warnings.warn('test warning', Warning)
+            warnings.warn("test warning", Warning)
             return x
 
     model = DummyModel()
@@ -959,7 +1002,7 @@ def test_param_dataset_compression_false(dataset_compression: bool) -> None:
     auto = AutoMLRegressor(
         time_left_for_this_task=30,
         per_run_time_limit=5,
-        dataset_compression=dataset_compression
+        dataset_compression=dataset_compression,
     )
 
     assert auto._dataset_compression is None
@@ -980,14 +1023,16 @@ def test_construction_param_dataset_compression_true(dataset_compression: bool)
     auto = AutoMLRegressor(
         time_left_for_this_task=30,
         per_run_time_limit=5,
-        dataset_compression=dataset_compression
+        dataset_compression=dataset_compression,
     )
 
     assert auto._dataset_compression == default_dataset_compression_arg
 
 
 @pytest.mark.parametrize("dataset_compression", [{"memory_allocation": 0.2}])
-def test_construction_param_dataset_compression_valid_dict(dataset_compression: Dict) -> None:
+def test_construction_param_dataset_compression_valid_dict(
+    dataset_compression: Dict,
+) -> None:
     """
     Parameters
     ----------
@@ -1001,7 +1046,7 @@ def test_construction_param_dataset_compression_valid_dict(dataset_compression:
     auto = AutoMLRegressor(
         time_left_for_this_task=30,
         per_run_time_limit=5,
-        dataset_compression=dataset_compression
+        dataset_compression=dataset_compression,
     )
 
     expected_memory_allocation = dataset_compression["memory_allocation"]
@@ -1012,7 +1057,9 @@ def test_construction_param_dataset_compression_valid_dict(dataset_compression:
     assert auto._dataset_compression["methods"] == expected_methods
 
 
-@pytest.mark.parametrize("dataset_compression", [{"methods": ["precision", "subsample"]}])
+@pytest.mark.parametrize(
+    "dataset_compression", [{"methods": ["precision", "subsample"]}]
+)
 @pytest.mark.parametrize("X", [np.ones((100, 10), dtype=int)])
 @pytest.mark.parametrize("y", [np.random.random((100,))])
 @unittest.mock.patch("autosklearn.automl.reduce_dataset_size_if_too_large")
@@ -1020,7 +1067,7 @@ def test_fit_performs_dataset_compression_without_precision_with_int(
     mock_reduce_dataset: unittest.mock.MagicMock,
     dataset_compression: Dict,
     X: np.ndarray,
-    y: np.ndarray
+    y: np.ndarray,
 ) -> None:
     """We can't reduce the precision of ints as we do with floats. Suppose someone
     was to pass a column with `max_int64` and `min_int64`, any reduction of bits will
@@ -1053,7 +1100,7 @@ def test_fit_performs_dataset_compression_without_precision_with_int(
     auto = AutoMLRegressor(
         time_left_for_this_task=30,  # not used but required
         per_run_time_limit=5,  # not used but required
-        dataset_compression=dataset_compression
+        dataset_compression=dataset_compression,
     )
 
     # To prevent fitting anything we use `only_return_configuration_space`
@@ -1066,36 +1113,48 @@ def test_fit_performs_dataset_compression_without_precision_with_int(
 
 
 @pytest.mark.parametrize("dataset_compression", [True])
-@pytest.mark.parametrize("X", [
-    np.empty((10, 10)),
-    csr_matrix(np.identity(10)),
-    pytest.param(
-        np.empty((10, 10)).tolist(),
-        marks=pytest.mark.xfail(reason="Converted to dataframe by InputValidator")
-    ),
-    pytest.param(
-        pd.DataFrame(np.empty((10, 10))),
-        marks=pytest.mark.xfail(reason="No pandas support yet for dataset compression")
-    )
-])
-@pytest.mark.parametrize("y", [
-    np.random.random((10, 1)),
-    np.random.random((10, 1)).tolist(),
-    pytest.param(
-        pd.Series(np.random.random((10,))),
-        marks=pytest.mark.xfail(reason="No pandas support yet for dataset compression")
-    ),
-    pytest.param(
-        pd.DataFrame(np.random.random((10, 10))),
-        marks=pytest.mark.xfail(reason="No pandas support yet for dataset compression")
-    )
-])
+@pytest.mark.parametrize(
+    "X",
+    [
+        np.empty((10, 10)),
+        csr_matrix(np.identity(10)),
+        pytest.param(
+            np.empty((10, 10)).tolist(),
+            marks=pytest.mark.xfail(reason="Converted to dataframe by InputValidator"),
+        ),
+        pytest.param(
+            pd.DataFrame(np.empty((10, 10))),
+            marks=pytest.mark.xfail(
+                reason="No pandas support yet for dataset compression"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "y",
+    [
+        np.random.random((10, 1)),
+        np.random.random((10, 1)).tolist(),
+        pytest.param(
+            pd.Series(np.random.random((10,))),
+            marks=pytest.mark.xfail(
+                reason="No pandas support yet for dataset compression"
+            ),
+        ),
+        pytest.param(
+            pd.DataFrame(np.random.random((10, 10))),
+            marks=pytest.mark.xfail(
+                reason="No pandas support yet for dataset compression"
+            ),
+        ),
+    ],
+)
 @unittest.mock.patch("autosklearn.automl.reduce_dataset_size_if_too_large")
 def test_fit_performs_dataset_compression(
     mock_reduce_dataset: unittest.mock.MagicMock,
     dataset_compression: bool,
     X: Union[np.ndarray, spmatrix, List, pd.DataFrame],
-    y: Union[np.ndarray, List, pd.Series, pd.DataFrame]
+    y: Union[np.ndarray, List, pd.Series, pd.DataFrame],
 ) -> None:
     """
     Parameters
@@ -1122,7 +1181,7 @@ def test_fit_performs_dataset_compression(
     auto = AutoMLRegressor(
         time_left_for_this_task=30,  # not used but required
         per_run_time_limit=5,  # not used but required
-        dataset_compression=dataset_compression
+        dataset_compression=dataset_compression,
     )
 
     # To prevent fitting anything we use `only_return_configuration_space`
diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py
index b32d1d0026..ac60e51472 100644
--- a/test/test_automl/test_estimators.py
+++ b/test/test_automl/test_estimators.py
@@ -3,67 +3,74 @@
 import copy
 import glob
 import importlib
-import os
 import inspect
 import itertools
+import os
 import pickle
 import re
 import sys
 import tempfile
 import unittest
 import unittest.mock
-import pytest
 
-from ConfigSpace.configuration_space import Configuration
 import joblib
-from joblib import cpu_count
 import numpy as np
 import numpy.ma as npma
 import pandas as pd
+import pytest
 import sklearn
-import sklearn.model_selection as model_selection
-import sklearn.dummy
 import sklearn.datasets
-from sklearn.base import clone
-from sklearn.base import ClassifierMixin, RegressorMixin
-from sklearn.base import is_classifier
-from smac.tae import StatusType
+import sklearn.dummy
+import sklearn.model_selection as model_selection
+from ConfigSpace.configuration_space import Configuration
 from dask.distributed import Client
+from joblib import cpu_count
+from sklearn.base import ClassifierMixin, RegressorMixin, clone, is_classifier
+from smac.tae import StatusType
 
-from autosklearn.data.validation import InputValidator
+import autosklearn.estimators  # noqa F401
 import autosklearn.pipeline.util as putil
+from autosklearn.automl import AutoMLClassifier
+from autosklearn.data.validation import InputValidator
 from autosklearn.ensemble_builder import MODEL_FN_RE
-import autosklearn.estimators  # noqa F401
 from autosklearn.estimators import (
-    AutoSklearnEstimator, AutoSklearnRegressor, AutoSklearnClassifier
+    AutoSklearnClassifier,
+    AutoSklearnEstimator,
+    AutoSklearnRegressor,
 )
-from autosklearn.metrics import accuracy, f1_macro, mean_squared_error, r2
-from autosklearn.automl import AutoMLClassifier
 from autosklearn.experimental.askl2 import AutoSklearn2Classifier
+from autosklearn.metrics import accuracy, f1_macro, mean_squared_error, r2
 from autosklearn.smbo import get_smac_object
 
 sys.path.append(os.path.dirname(__file__))
-from automl_utils import print_debug_information, count_succeses, includes_train_scores, includes_all_scores, include_single_scores, performance_over_time_is_plausible  # noqa (E402: module level import not at top of file)
+from automl_utils import (  # noqa (E402: module level import not at top of file)
+    count_succeses,
+    include_single_scores,
+    includes_all_scores,
+    includes_train_scores,
+    performance_over_time_is_plausible,
+    print_debug_information,
+)
 
 
 def test_fit_n_jobs(tmp_dir):
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('breast_cancer')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("breast_cancer")
 
     # test parallel Classifier to predict classes, not only indices
     Y_train += 1
     Y_test += 1
 
     class get_smac_object_wrapper:
-
         def __call__(self, *args, **kwargs):
-            self.n_jobs = kwargs['n_jobs']
+            self.n_jobs = kwargs["n_jobs"]
             smac = get_smac_object(*args, **kwargs)
             self.dask_n_jobs = smac.solver.tae_runner.n_workers
             self.dask_client_n_jobs = len(
-                smac.solver.tae_runner.client.scheduler_info()['workers']
+                smac.solver.tae_runner.client.scheduler_info()["workers"]
             )
             return smac
+
     get_smac_object_wrapper_instance = get_smac_object_wrapper()
 
     automl = AutoSklearnClassifier(
@@ -75,8 +82,7 @@ def __call__(self, *args, **kwargs):
         initial_configurations_via_metalearning=0,
         ensemble_size=5,
         n_jobs=2,
-        include={'classifier': ['sgd'],
-                 'feature_preprocessor': ['no_preprocessing']},
+        include={"classifier": ["sgd"], "feature_preprocessor": ["no_preprocessing"]},
         get_smac_object_callback=get_smac_object_wrapper_instance,
         max_models_on_disc=None,
     )
@@ -84,17 +90,24 @@ def __call__(self, *args, **kwargs):
     automl.fit(X_train, Y_train)
 
     # Test that the argument is correctly passed to SMAC
-    assert getattr(get_smac_object_wrapper_instance, 'n_jobs') == 2
-    assert getattr(get_smac_object_wrapper_instance, 'dask_n_jobs') == 2
-    assert getattr(get_smac_object_wrapper_instance, 'dask_client_n_jobs') == 2
+    assert getattr(get_smac_object_wrapper_instance, "n_jobs") == 2
+    assert getattr(get_smac_object_wrapper_instance, "dask_n_jobs") == 2
+    assert getattr(get_smac_object_wrapper_instance, "dask_client_n_jobs") == 2
 
     available_num_runs = set()
     for run_key, run_value in automl.automl_.runhistory_.data.items():
-        if run_value.additional_info is not None and 'num_run' in run_value.additional_info:
-            available_num_runs.add(run_value.additional_info['num_run'])
+        if (
+            run_value.additional_info is not None
+            and "num_run" in run_value.additional_info
+        ):
+            available_num_runs.add(run_value.additional_info["num_run"])
     available_predictions = set()
     predictions = glob.glob(
-        os.path.join(automl.automl_._backend.get_runs_directory(), '*', 'predictions_ensemble*.npy')
+        os.path.join(
+            automl.automl_._backend.get_runs_directory(),
+            "*",
+            "predictions_ensemble*.npy",
+        )
     )
     seeds = set()
     for prediction in predictions:
@@ -117,7 +130,7 @@ def __call__(self, *args, **kwargs):
 
     seeds = set()
     for ensemble_file in ensembles:
-        seeds.add(int(ensemble_file.split('.')[0].split('_')[0]))
+        seeds.add(int(ensemble_file.split(".")[0].split("_")[0]))
     assert len(seeds) == 1
 
     assert count_succeses(automl.cv_results_) > 0
@@ -132,7 +145,7 @@ def test_feat_type_wrong_arguments():
     # Every Auto-Sklearn estimator has a backend, that allows a single
     # call to fit
     X = np.zeros((100, 100))
-    y = np.zeros((100, ))
+    y = np.zeros((100,))
 
     cls = AutoSklearnClassifier(ensemble_size=0)
     expected_msg = r".*feat_type does not have same number of "
@@ -143,43 +156,55 @@ def test_feat_type_wrong_arguments():
     cls = AutoSklearnClassifier(ensemble_size=0)
     expected_msg = r".*feat_type must only contain strings.*"
     with pytest.raises(ValueError, match=expected_msg):
-        cls.fit(X=X, y=y, feat_type=[True]*100)
+        cls.fit(X=X, y=y, feat_type=[True] * 100)
 
     cls = AutoSklearnClassifier(ensemble_size=0)
     expected_msg = r".*Only `Categorical`, `Numerical` and `String` are"
     "valid feature types, you passed `Car`.*"
     with pytest.raises(ValueError, match=expected_msg):
-        cls.fit(X=X, y=y, feat_type=['Car']*100)
+        cls.fit(X=X, y=y, feat_type=["Car"] * 100)
 
 
 # Mock AutoSklearnEstimator.fit so the test doesn't actually run fit().
-@unittest.mock.patch('autosklearn.estimators.AutoSklearnEstimator.fit')
+@unittest.mock.patch("autosklearn.estimators.AutoSklearnEstimator.fit")
 def test_type_of_target(mock_estimator):
     # Test that classifier raises error for illegal target types.
-    X = np.array([[1, 2],
-                  [2, 3],
-                  [3, 4],
-                  [4, 5],
-                  ])
+    X = np.array(
+        [
+            [1, 2],
+            [2, 3],
+            [3, 4],
+            [4, 5],
+        ]
+    )
     # Possible target types
     y_binary = np.array([0, 0, 1, 1])
     y_continuous = np.array([0.1, 1.3, 2.1, 4.0])
     y_multiclass = np.array([0, 1, 2, 0])
-    y_multilabel = np.array([[0, 1],
-                             [1, 1],
-                             [1, 0],
-                             [0, 0],
-                             ])
-    y_multiclass_multioutput = np.array([[0, 1],
-                                         [1, 3],
-                                         [2, 2],
-                                         [5, 3],
-                                         ])
-    y_continuous_multioutput = np.array([[0.1, 1.5],
-                                         [1.2, 3.5],
-                                         [2.7, 2.7],
-                                         [5.5, 3.9],
-                                         ])
+    y_multilabel = np.array(
+        [
+            [0, 1],
+            [1, 1],
+            [1, 0],
+            [0, 0],
+        ]
+    )
+    y_multiclass_multioutput = np.array(
+        [
+            [0, 1],
+            [1, 3],
+            [2, 2],
+            [5, 3],
+        ]
+    )
+    y_continuous_multioutput = np.array(
+        [
+            [0.1, 1.5],
+            [1.2, 3.5],
+            [2.7, 2.7],
+            [5.5, 3.9],
+        ]
+    )
 
     cls = AutoSklearnClassifier(ensemble_size=0)
     cls.automl_ = unittest.mock.Mock()
@@ -208,20 +233,19 @@ def test_type_of_target(mock_estimator):
     try:
         cls.fit(X, y_binary)
     except ValueError:
-        pytest.fail("cls.fit() raised ValueError while fitting "
-                    "binary targets")
+        pytest.fail("cls.fit() raised ValueError while fitting " "binary targets")
 
     try:
         cls.fit(X, y_multiclass)
     except ValueError:
-        pytest.fail("cls.fit() raised ValueError while fitting "
-                    "multiclass targets")
+        pytest.fail("cls.fit() raised ValueError while fitting " "multiclass targets")
 
     try:
         cls.fit(X, y_multilabel)
     except ValueError:
-        pytest.fail("cls.fit() raised ValueError while fitting "
-                    "multilabel-indicator targets")
+        pytest.fail(
+            "cls.fit() raised ValueError while fitting " "multilabel-indicator targets"
+        )
 
     # Test that regressor raises error for illegal target types.
     reg = AutoSklearnRegressor(ensemble_size=0)
@@ -230,12 +254,18 @@ def test_type_of_target(mock_estimator):
     expected_msg = r".*Regression with data of type"
     " multilabel-indicator is not supported.*"
     with pytest.raises(ValueError, match=expected_msg):
-        reg.fit(X=X, y=y_multilabel,)
+        reg.fit(
+            X=X,
+            y=y_multilabel,
+        )
 
     expected_msg = r".*Regression with data of type"
     " multiclass-multioutput is not supported.*"
     with pytest.raises(ValueError, match=expected_msg):
-        reg.fit(X=X, y=y_multiclass_multioutput,)
+        reg.fit(
+            X=X,
+            y=y_multiclass_multioutput,
+        )
 
     # Legal target types: continuous, multiclass,
     # continuous-multioutput,
@@ -243,37 +273,38 @@ def test_type_of_target(mock_estimator):
     try:
         reg.fit(X, y_continuous)
     except ValueError:
-        pytest.fail("reg.fit() raised ValueError while fitting "
-                    "continuous targets")
+        pytest.fail("reg.fit() raised ValueError while fitting " "continuous targets")
 
     try:
         reg.fit(X, y_multiclass)
     except ValueError:
-        pytest.fail("reg.fit() raised ValueError while fitting "
-                    "multiclass targets")
+        pytest.fail("reg.fit() raised ValueError while fitting " "multiclass targets")
 
     try:
         reg.fit(X, y_continuous_multioutput)
     except ValueError:
-        pytest.fail("reg.fit() raised ValueError while fitting "
-                    "continuous_multioutput targets")
+        pytest.fail(
+            "reg.fit() raised ValueError while fitting "
+            "continuous_multioutput targets"
+        )
 
     try:
         reg.fit(X, y_binary)
     except ValueError:
-        pytest.fail("reg.fit() raised ValueError while fitting "
-                    "binary targets")
+        pytest.fail("reg.fit() raised ValueError while fitting " "binary targets")
 
 
 def test_performance_over_time_no_ensemble(tmp_dir):
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
 
-    cls = AutoSklearnClassifier(time_left_for_this_task=30,
-                                per_run_time_limit=5,
-                                tmp_folder=tmp_dir,
-                                seed=1,
-                                initial_configurations_via_metalearning=0,
-                                ensemble_size=0,)
+    cls = AutoSklearnClassifier(
+        time_left_for_this_task=30,
+        per_run_time_limit=5,
+        tmp_folder=tmp_dir,
+        seed=1,
+        initial_configurations_via_metalearning=0,
+        ensemble_size=0,
+    )
 
     cls.fit(X_train, Y_train, X_test, Y_test)
 
@@ -285,16 +316,17 @@ def test_performance_over_time_no_ensemble(tmp_dir):
 def test_cv_results(tmp_dir):
     # TODO restructure and actually use real SMAC output from a long run
     # to do this unittest!
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
 
-    cls = AutoSklearnClassifier(time_left_for_this_task=30,
-                                per_run_time_limit=5,
-                                tmp_folder=tmp_dir,
-                                seed=1,
-                                initial_configurations_via_metalearning=0,
-                                ensemble_size=0,
-                                scoring_functions=[autosklearn.metrics.precision,
-                                                   autosklearn.metrics.roc_auc])
+    cls = AutoSklearnClassifier(
+        time_left_for_this_task=30,
+        per_run_time_limit=5,
+        tmp_folder=tmp_dir,
+        seed=1,
+        initial_configurations_via_metalearning=0,
+        ensemble_size=0,
+        scoring_functions=[autosklearn.metrics.precision, autosklearn.metrics.roc_auc],
+    )
 
     params = cls.get_params()
     original_params = copy.deepcopy(params)
@@ -303,23 +335,27 @@ def test_cv_results(tmp_dir):
 
     cv_results = cls.cv_results_
     assert isinstance(cv_results, dict), type(cv_results)
-    assert isinstance(cv_results['mean_test_score'], np.ndarray), type(
-        cv_results['mean_test_score'])
-    assert isinstance(cv_results['mean_fit_time'], np.ndarray), type(
-        cv_results['mean_fit_time']
+    assert isinstance(cv_results["mean_test_score"], np.ndarray), type(
+        cv_results["mean_test_score"]
     )
-    assert isinstance(cv_results['params'], list), type(cv_results['params'])
-    assert isinstance(cv_results['rank_test_scores'], np.ndarray), type(
-        cv_results['rank_test_scores']
+    assert isinstance(cv_results["mean_fit_time"], np.ndarray), type(
+        cv_results["mean_fit_time"]
     )
-    assert isinstance(cv_results['metric_precision'], npma.MaskedArray), type(
-        cv_results['metric_precision']
+    assert isinstance(cv_results["params"], list), type(cv_results["params"])
+    assert isinstance(cv_results["rank_test_scores"], np.ndarray), type(
+        cv_results["rank_test_scores"]
     )
-    assert isinstance(cv_results['metric_roc_auc'], npma.MaskedArray), type(
-        cv_results['metric_roc_auc']
+    assert isinstance(cv_results["metric_precision"], npma.MaskedArray), type(
+        cv_results["metric_precision"]
     )
-    cv_result_items = [isinstance(val, npma.MaskedArray) for key, val in
-                       cv_results.items() if key.startswith('param_')]
+    assert isinstance(cv_results["metric_roc_auc"], npma.MaskedArray), type(
+        cv_results["metric_roc_auc"]
+    )
+    cv_result_items = [
+        isinstance(val, npma.MaskedArray)
+        for key, val in cv_results.items()
+        if key.startswith("param_")
+    ]
     assert all(cv_result_items), cv_results.items()
 
     # Compare the state of the model parameters with the original parameters
@@ -337,21 +373,20 @@ def test_cv_results(tmp_dir):
         assert joblib.hash(new_value) == joblib.hash(original_value), (
             "Estimator %s should not change or mutate "
             " the parameter %s from %s to %s during fit."
-            % (cls, param_name, original_value, new_value))
+            % (cls, param_name, original_value, new_value)
+        )
 
     # Comply with https://scikit-learn.org/dev/glossary.html#term-classes
     is_classifier(cls)
-    assert hasattr(cls, 'classes_')
+    assert hasattr(cls, "classes_")
 
 
-@pytest.mark.parametrize('estimator_type,dataset_name', [
-    (AutoSklearnClassifier, 'iris'),
-    (AutoSklearnRegressor, 'boston')
-])
+@pytest.mark.parametrize(
+    "estimator_type,dataset_name",
+    [(AutoSklearnClassifier, "iris"), (AutoSklearnRegressor, "boston")],
+)
 def test_leaderboard(
-    tmp_dir: str,
-    estimator_type: Type[AutoSklearnEstimator],
-    dataset_name: str
+    tmp_dir: str, estimator_type: Type[AutoSklearnEstimator], dataset_name: str
 ):
     # Comprehensive test tasks a substantial amount of time, manually set if
     # required.
@@ -361,16 +396,16 @@ def test_leaderboard(
     # Create a dict of all possible param values for each param
     # with some invalid one's of the incorrect type
     include_combinations = itertools.chain(
-        itertools.combinations(column_types['all'], item_count)
+        itertools.combinations(column_types["all"], item_count)
         for item_count in range(1, MAX_COMBO_SIZE_FOR_INCLUDE_PARAM)
     )
     valid_params = {
-        'detailed': [True, False],
-        'ensemble_only': [True, False],
-        'top_k': [-10, 0, 1, 10, 'all'],
-        'sort_by': [*column_types['all'], 'invalid'],
-        'sort_order': ['ascending', 'descending', 'auto', 'invalid', None],
-        'include': itertools.chain([None, 'invalid', 'type'], include_combinations),
+        "detailed": [True, False],
+        "ensemble_only": [True, False],
+        "top_k": [-10, 0, 1, 10, "all"],
+        "sort_by": [*column_types["all"], "invalid"],
+        "sort_order": ["ascending", "descending", "auto", "invalid", None],
+        "include": itertools.chain([None, "invalid", "type"], include_combinations),
     }
 
     # Create a generator of all possible combinations of valid_params
@@ -381,55 +416,49 @@ def test_leaderboard(
 
     X_train, Y_train, _, _ = putil.get_dataset(dataset_name)
     model = estimator_type(
-        time_left_for_this_task=30,
-        per_run_time_limit=5,
-        tmp_folder=tmp_dir,
-        seed=1
+        time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, seed=1
     )
 
     model.fit(X_train, Y_train)
 
     for params in params_generator:
         # Convert from iterator to solid list
-        if params['include'] is not None and not isinstance(params['include'], str):
-            params['include'] = list(params['include'])
+        if params["include"] is not None and not isinstance(params["include"], str):
+            params["include"] = list(params["include"])
 
         # Invalid top_k should raise an error, is a positive int or 'all'
-        if not (params['top_k'] == 'all' or params['top_k'] > 0):
+        if not (params["top_k"] == "all" or params["top_k"] > 0):
             with pytest.raises(ValueError):
                 model.leaderboard(**params)
 
         # Invalid sort_by column
-        elif params['sort_by'] not in column_types['all']:
+        elif params["sort_by"] not in column_types["all"]:
             with pytest.raises(ValueError):
                 model.leaderboard(**params)
 
         # Shouldn't accept an invalid sort order
-        elif params['sort_order'] not in ['ascending', 'descending', 'auto']:
+        elif params["sort_order"] not in ["ascending", "descending", "auto"]:
             with pytest.raises(ValueError):
                 model.leaderboard(**params)
 
         # include is single str but not valid
         elif (
-            isinstance(params['include'], str)
-            and params['include'] not in column_types['all']
+            isinstance(params["include"], str)
+            and params["include"] not in column_types["all"]
         ):
             with pytest.raises(ValueError):
                 model.leaderboard(**params)
 
         # Crash if include is list but contains invalid column
         elif (
-            isinstance(params['include'], list)
-            and len(set(params['include']) - set(column_types['all'])) != 0
+            isinstance(params["include"], list)
+            and len(set(params["include"]) - set(column_types["all"])) != 0
         ):
             with pytest.raises(ValueError):
                 model.leaderboard(**params)
 
         # Can't have just model_id, in both single str and list case
-        elif (
-            params['include'] == 'model_id'
-            or params['include'] == ['model_id']
-        ):
+        elif params["include"] == "model_id" or params["include"] == ["model_id"]:
             with pytest.raises(ValueError):
                 model.leaderboard(**params)
 
@@ -439,8 +468,8 @@ def test_leaderboard(
 
             # top_k should never be less than the rows given back
             # It can however be larger
-            if isinstance(params['top_k'], int):
-                assert params['top_k'] >= len(leaderboard)
+            if isinstance(params["top_k"], int):
+                assert params["top_k"] >= len(leaderboard)
 
             # Check the right columns are present and in the right order
             # The model_id is set as the index, not included in pandas columns
@@ -449,43 +478,47 @@ def test_leaderboard(
             def exclude(lst, s):
                 return [x for x in lst if x != s]
 
-            if params['include'] is not None:
+            if params["include"] is not None:
                 # Include with only single str should be the only column
-                if isinstance(params['include'], str):
-                    assert params['include'] in columns and len(columns) == 1
+                if isinstance(params["include"], str):
+                    assert params["include"] in columns and len(columns) == 1
                 # Include as a list should have all the columns without model_id
                 else:
-                    assert columns == exclude(params['include'], 'model_id')
-            elif params['detailed']:
-                assert columns == exclude(column_types['detailed'], 'model_id')
+                    assert columns == exclude(params["include"], "model_id")
+            elif params["detailed"]:
+                assert columns == exclude(column_types["detailed"], "model_id")
             else:
-                assert columns == exclude(column_types['simple'], 'model_id')
+                assert columns == exclude(column_types["simple"], "model_id")
 
             # Ensure that if it's ensemble only
             # Can only check if 'ensemble_weight' is present
-            if (
-                params['ensemble_only']
-                and 'ensemble_weight' in columns
-            ):
-                assert all(leaderboard['ensemble_weight'] > 0)
-
-
-@pytest.mark.parametrize('estimator', [AutoSklearnRegressor])
-@pytest.mark.parametrize('resampling_strategy', ['holdout'])
-@pytest.mark.parametrize('X', [
-    np.asarray([[1.0, 1.0, 1.0]] * 25 + [[2.0, 2.0, 2.0]] * 25 +
-               [[3.0, 3.0, 3.0]] * 25 + [[4.0, 4.0, 4.0]] * 25)
-])
-@pytest.mark.parametrize('y', [
-    np.asarray([1.0] * 25 + [2.0] * 25 + [3.0] * 25 + [4.0] * 25)
-])
+            if params["ensemble_only"] and "ensemble_weight" in columns:
+                assert all(leaderboard["ensemble_weight"] > 0)
+
+
+@pytest.mark.parametrize("estimator", [AutoSklearnRegressor])
+@pytest.mark.parametrize("resampling_strategy", ["holdout"])
+@pytest.mark.parametrize(
+    "X",
+    [
+        np.asarray(
+            [[1.0, 1.0, 1.0]] * 25
+            + [[2.0, 2.0, 2.0]] * 25
+            + [[3.0, 3.0, 3.0]] * 25
+            + [[4.0, 4.0, 4.0]] * 25
+        )
+    ],
+)
+@pytest.mark.parametrize(
+    "y", [np.asarray([1.0] * 25 + [2.0] * 25 + [3.0] * 25 + [4.0] * 25)]
+)
 def test_show_models_with_holdout(
     tmp_dir: str,
     dask_client: Client,
     estimator: AutoSklearnEstimator,
     resampling_strategy: str,
     X: np.ndarray,
-    y: np.ndarray
+    y: np.ndarray,
 ) -> None:
     """
     Parameters
@@ -521,39 +554,44 @@ def test_show_models_with_holdout(
         per_run_time_limit=5,
         tmp_folder=tmp_dir,
         resampling_strategy=resampling_strategy,
-        dask_client=dask_client
+        dask_client=dask_client,
     )
     automl.fit(X, y)
 
     models = automl.show_models().values()
 
-    model_keys = set([
-        'model_id', 'rank', 'cost', 'ensemble_weight',
-        'data_preprocessor', 'feature_preprocessor',
-        'regressor', 'sklearn_regressor'
-    ])
+    model_keys = set(
+        [
+            "model_id",
+            "rank",
+            "cost",
+            "ensemble_weight",
+            "data_preprocessor",
+            "feature_preprocessor",
+            "regressor",
+            "sklearn_regressor",
+        ]
+    )
 
     assert all([model_keys == set(model.keys()) for model in models])
-    assert all([model['regressor'] for model in models])
-    assert all([model['sklearn_regressor'] for model in models])
+    assert all([model["regressor"] for model in models])
+    assert all([model["sklearn_regressor"] for model in models])
     assert not any([None in model.values() for model in models])
 
 
-@pytest.mark.parametrize('estimator', [AutoSklearnClassifier])
-@pytest.mark.parametrize('resampling_strategy', ['cv'])
-@pytest.mark.parametrize('X', [
-    np.asarray([[1.0, 1.0, 1.0]] * 50 + [[2.0, 2.0, 2.0]] * 50)
-])
-@pytest.mark.parametrize('y', [
-    np.asarray([1] * 50 + [2] * 50)
-])
+@pytest.mark.parametrize("estimator", [AutoSklearnClassifier])
+@pytest.mark.parametrize("resampling_strategy", ["cv"])
+@pytest.mark.parametrize(
+    "X", [np.asarray([[1.0, 1.0, 1.0]] * 50 + [[2.0, 2.0, 2.0]] * 50)]
+)
+@pytest.mark.parametrize("y", [np.asarray([1] * 50 + [2] * 50)])
 def test_show_models_with_cv(
     tmp_dir: str,
     dask_client: Client,
     estimator: AutoSklearnEstimator,
     resampling_strategy: str,
     X: np.ndarray,
-    y: np.ndarray
+    y: np.ndarray,
 ) -> None:
     """
     Parameters
@@ -578,12 +616,12 @@ def test_show_models_with_cv(
 
     Expects
     -------
-    * Expects all the model dictionaries to have ``model_keys``
-    * Expects no model to have any ``None`` value
-    * Expects all the estimators in a model to have ``estimator_keys``
-    * Expects all model estimators to have an auto-sklearn wrapped model ``classifier``
-    * Expects all model estimators to have a sklearn wrapped model ``sklearn_classifier``
-    * Expects no estimator to have ``None`` value
+    * Expects all the model dictionaries to have `model_keys`
+    * Expects no model to have any `None` value
+    * Expects all the estimators in a model to have `estimator_keys`
+    * Expects all model estimators to have an auto-sklearn wrapped model `classifier`
+    * Expects all model estimators to have a sklearn wrapped model `sklearn_classifier`
+    * Expects no estimator to have None
     """
 
     automl = estimator(
@@ -591,37 +629,59 @@ def test_show_models_with_cv(
         per_run_time_limit=5,
         tmp_folder=tmp_dir,
         resampling_strategy=resampling_strategy,
-        dask_client=dask_client
+        dask_client=dask_client,
     )
     automl.fit(X, y)
 
     models = automl.show_models().values()
 
-    model_keys = set([
-        'model_id', 'rank',
-        'cost', 'ensemble_weight',
-        'voting_model', 'estimators'
-    ])
+    model_keys = set(
+        ["model_id", "rank", "cost", "ensemble_weight", "voting_model", "estimators"]
+    )
 
-    estimator_keys = set([
-        'data_preprocessor', 'balancing',
-        'feature_preprocessor', 'classifier',
-        'sklearn_classifier'
-    ])
+    estimator_keys = set(
+        [
+            "data_preprocessor",
+            "balancing",
+            "feature_preprocessor",
+            "classifier",
+            "sklearn_classifier",
+        ]
+    )
 
     assert all([model_keys == set(model.keys()) for model in models])
     assert not any([None in model.values() for model in models])
-    assert all([estimator_keys == set(estimator.keys())
-                for model in models for estimator in model['estimators']])
-    assert all([estimator['classifier']
-                for model in models for estimator in model['estimators']])
-    assert all([estimator['sklearn_classifier']
-                for model in models for estimator in model['estimators']])
-    assert not any([None in estimator.values()
-                    for model in models for estimator in model['estimators']])
+    assert all(
+        [
+            estimator_keys == set(estimator.keys())
+            for model in models
+            for estimator in model["estimators"]
+        ]
+    )
+    assert all(
+        [
+            estimator["classifier"]
+            for model in models
+            for estimator in model["estimators"]
+        ]
+    )
+    assert all(
+        [
+            estimator["sklearn_classifier"]
+            for model in models
+            for estimator in model["estimators"]
+        ]
+    )
+    assert not any(
+        [
+            None in estimator.values()
+            for model in models
+            for estimator in model["estimators"]
+        ]
+    )
 
 
-@unittest.mock.patch('autosklearn.estimators.AutoSklearnEstimator.build_automl')
+@unittest.mock.patch("autosklearn.estimators.AutoSklearnEstimator.build_automl")
 def test_fit_n_jobs_negative(build_automl_patch):
     n_cores = cpu_count()
     cls = AutoSklearnEstimator(n_jobs=-1, ensemble_size=0)
@@ -634,12 +694,17 @@ def test_get_number_of_available_cores():
     assert n_cores >= 1, n_cores
 
 
-@unittest.mock.patch('autosklearn.automl.AutoML.predict')
+@unittest.mock.patch("autosklearn.automl.AutoML.predict")
 def test_multiclass_prediction(predict_mock, dask_client):
-    predicted_probabilities = [[0, 0, 0.99], [0, 0.99, 0], [0.99, 0, 0],
-                               [0, 0.99, 0], [0, 0, 0.99]]
+    predicted_probabilities = [
+        [0, 0, 0.99],
+        [0, 0.99, 0],
+        [0.99, 0, 0],
+        [0, 0.99, 0],
+        [0, 0, 0.99],
+    ]
     predicted_indexes = [2, 1, 0, 1, 2]
-    expected_result = ['c', 'b', 'a', 'b', 'c']
+    expected_result = ["c", "b", "a", "b", "c"]
 
     predict_mock.return_value = np.array(predicted_probabilities)
 
@@ -650,7 +715,7 @@ def test_multiclass_prediction(predict_mock, dask_client):
     )
     classifier.InputValidator = InputValidator(is_classification=True)
     classifier.InputValidator.target_validator.fit(
-        pd.DataFrame(expected_result, dtype='category'),
+        pd.DataFrame(expected_result, dtype="category"),
     )
     classifier.InputValidator._is_fitted = True
 
@@ -659,13 +724,15 @@ def test_multiclass_prediction(predict_mock, dask_client):
     np.testing.assert_array_equal(expected_result, actual_result)
 
 
-@unittest.mock.patch('autosklearn.automl.AutoML.predict')
+@unittest.mock.patch("autosklearn.automl.AutoML.predict")
 def test_multilabel_prediction(predict_mock, dask_client):
-    predicted_probabilities = [[0.99, 0],
-                               [0.99, 0],
-                               [0, 0.99],
-                               [0.99, 0.99],
-                               [0.99, 0.99]]
+    predicted_probabilities = [
+        [0.99, 0],
+        [0.99, 0],
+        [0, 0.99],
+        [0.99, 0.99],
+        [0.99, 0.99],
+    ]
     predicted_indexes = np.array([[1, 0], [1, 0], [0, 1], [1, 1], [1, 1]])
 
     predict_mock.return_value = np.array(predicted_probabilities)
@@ -677,11 +744,14 @@ def test_multilabel_prediction(predict_mock, dask_client):
     )
     classifier.InputValidator = InputValidator(is_classification=True)
     classifier.InputValidator.target_validator.fit(
-        pd.DataFrame(predicted_indexes, dtype='int64'),
+        pd.DataFrame(predicted_indexes, dtype="int64"),
     )
     classifier.InputValidator._is_fitted = True
 
-    assert classifier.InputValidator.target_validator.type_of_target == 'multilabel-indicator'
+    assert (
+        classifier.InputValidator.target_validator.type_of_target
+        == "multilabel-indicator"
+    )
 
     actual_result = classifier.predict([None] * len(predicted_indexes))
 
@@ -689,68 +759,66 @@ def test_multilabel_prediction(predict_mock, dask_client):
 
 
 def test_can_pickle_classifier(tmp_dir, dask_client):
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
-    automl = AutoSklearnClassifier(time_left_for_this_task=30,
-                                   delete_tmp_folder_after_terminate=False,
-                                   per_run_time_limit=5,
-                                   tmp_folder=tmp_dir,
-                                   dask_client=dask_client,
-                                   )
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
+    automl = AutoSklearnClassifier(
+        time_left_for_this_task=30,
+        delete_tmp_folder_after_terminate=False,
+        per_run_time_limit=5,
+        tmp_folder=tmp_dir,
+        dask_client=dask_client,
+    )
 
     automl.fit(X_train, Y_train)
 
     initial_predictions = automl.predict(X_test)
-    initial_accuracy = sklearn.metrics.accuracy_score(Y_test,
-                                                      initial_predictions)
+    initial_accuracy = sklearn.metrics.accuracy_score(Y_test, initial_predictions)
     assert initial_accuracy >= 0.75
     assert count_succeses(automl.cv_results_) > 0
     assert includes_train_scores(automl.performance_over_time_.columns) is True
     assert performance_over_time_is_plausible(automl.performance_over_time_) is True
 
     # Test pickle
-    dump_file = os.path.join(tmp_dir, 'automl.dump.pkl')
+    dump_file = os.path.join(tmp_dir, "automl.dump.pkl")
 
-    with open(dump_file, 'wb') as f:
+    with open(dump_file, "wb") as f:
         pickle.dump(automl, f)
 
-    with open(dump_file, 'rb') as f:
+    with open(dump_file, "rb") as f:
         restored_automl = pickle.load(f)
 
     restored_predictions = restored_automl.predict(X_test)
-    restored_accuracy = sklearn.metrics.accuracy_score(Y_test,
-                                                       restored_predictions)
+    restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions)
     assert restored_accuracy >= 0.75
     assert initial_accuracy == restored_accuracy
 
     # Test joblib
-    dump_file = os.path.join(tmp_dir, 'automl.dump.joblib')
+    dump_file = os.path.join(tmp_dir, "automl.dump.joblib")
 
     joblib.dump(automl, dump_file)
 
     restored_automl = joblib.load(dump_file)
 
     restored_predictions = restored_automl.predict(X_test)
-    restored_accuracy = sklearn.metrics.accuracy_score(Y_test,
-                                                       restored_predictions)
+    restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions)
     assert restored_accuracy >= 0.75
     assert initial_accuracy == restored_accuracy
 
 
 def test_multilabel(tmp_dir, dask_client):
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset(
-        'iris', make_multilabel=True)
-    automl = AutoSklearnClassifier(time_left_for_this_task=30,
-                                   per_run_time_limit=5,
-                                   tmp_folder=tmp_dir,
-                                   dask_client=dask_client,
-                                   )
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris", make_multilabel=True)
+    automl = AutoSklearnClassifier(
+        time_left_for_this_task=30,
+        per_run_time_limit=5,
+        tmp_folder=tmp_dir,
+        dask_client=dask_client,
+    )
 
     automl.fit(X_train, Y_train)
 
     predictions = automl.predict(X_test)
     assert predictions.shape == (50, 3), print_debug_information(automl)
-    assert count_succeses(automl.cv_results_) > 0,  print_debug_information(automl)
+    assert count_succeses(automl.cv_results_) > 0, print_debug_information(automl)
     assert includes_train_scores(automl.performance_over_time_.columns) is True
     assert performance_over_time_is_plausible(automl.performance_over_time_) is True
 
@@ -763,20 +831,25 @@ def test_multilabel(tmp_dir, dask_client):
 
 def test_binary(tmp_dir, dask_client):
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset(
-        'iris', make_binary=True)
-    automl = AutoSklearnClassifier(time_left_for_this_task=40,
-                                   delete_tmp_folder_after_terminate=False,
-                                   per_run_time_limit=10,
-                                   tmp_folder=tmp_dir,
-                                   dask_client=dask_client,
-                                   )
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris", make_binary=True)
+    automl = AutoSklearnClassifier(
+        time_left_for_this_task=40,
+        delete_tmp_folder_after_terminate=False,
+        per_run_time_limit=10,
+        tmp_folder=tmp_dir,
+        dask_client=dask_client,
+    )
 
-    automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test,
-               dataset_name='binary_test_dataset')
+    automl.fit(
+        X_train,
+        Y_train,
+        X_test=X_test,
+        y_test=Y_test,
+        dataset_name="binary_test_dataset",
+    )
 
     predictions = automl.predict(X_test)
-    assert predictions.shape == (50, ), print_debug_information(automl)
+    assert predictions.shape == (50,), print_debug_information(automl)
 
     score = accuracy(Y_test, predictions)
     assert score > 0.9, print_debug_information(automl)
@@ -794,7 +867,7 @@ def test_classification_pandas_support(tmp_dir, dask_client):
     )
 
     # Drop NAN!!
-    X = X.dropna(axis='columns')
+    X = X.dropna(axis="columns")
 
     # This test only make sense if input is dataframe
     assert isinstance(X, pd.DataFrame)
@@ -802,7 +875,7 @@ def test_classification_pandas_support(tmp_dir, dask_client):
     automl = AutoSklearnClassifier(
         time_left_for_this_task=30,
         per_run_time_limit=5,
-        exclude={'classifier': ['libsvm_svc']},
+        exclude={"classifier": ["libsvm_svc"]},
         dask_client=dask_client,
         seed=5,
         tmp_folder=tmp_dir,
@@ -828,12 +901,13 @@ def test_classification_pandas_support(tmp_dir, dask_client):
 
 def test_regression(tmp_dir, dask_client):
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('boston')
-    automl = AutoSklearnRegressor(time_left_for_this_task=30,
-                                  per_run_time_limit=5,
-                                  tmp_folder=tmp_dir,
-                                  dask_client=dask_client,
-                                  )
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("boston")
+    automl = AutoSklearnRegressor(
+        time_left_for_this_task=30,
+        per_run_time_limit=5,
+        tmp_folder=tmp_dir,
+        dask_client=dask_client,
+    )
 
     automl.fit(X_train, Y_train)
 
@@ -842,8 +916,9 @@ def test_regression(tmp_dir, dask_client):
     score = mean_squared_error(Y_test, predictions)
 
     # On average np.sqrt(30) away from the target -> ~5.5 on average
-    # Results with select rates drops avg score to a range of -32.40 to -37, on 30 seconds
-    # constraint. With more time_left_for_this_task this is no longer an issue
+    # Results with select rates drops avg score to a range of -32.40 to -37,
+    # on 30 seconds constraint.
+    # With more time_left_for_this_task this is no longer an issue
     assert score >= -37, print_debug_information(automl)
     assert count_succeses(automl.cv_results_) > 0
     assert includes_train_scores(automl.performance_over_time_.columns) is True
@@ -856,13 +931,16 @@ def test_cv_regression(tmp_dir, dask_client):
     a regressor
     """
 
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('boston', train_size_maximum=300)
-    automl = AutoSklearnRegressor(time_left_for_this_task=60,
-                                  per_run_time_limit=10,
-                                  resampling_strategy='cv',
-                                  tmp_folder=tmp_dir,
-                                  dask_client=dask_client,
-                                  )
+    X_train, Y_train, X_test, Y_test = putil.get_dataset(
+        "boston", train_size_maximum=300
+    )
+    automl = AutoSklearnRegressor(
+        time_left_for_this_task=60,
+        per_run_time_limit=10,
+        resampling_strategy="cv",
+        tmp_folder=tmp_dir,
+        dask_client=dask_client,
+    )
 
     automl.fit(X_train, Y_train)
 
@@ -913,13 +991,15 @@ def test_autosklearn_classification_methods_returns_self(dask_client):
     Currently this method only tests that the methods of AutoSklearnClassifier
     is able to fit using fit(), fit_ensemble() and refit()
     """
-    X_train, y_train, X_test, y_test = putil.get_dataset('iris')
-    automl = AutoSklearnClassifier(time_left_for_this_task=60,
-                                   delete_tmp_folder_after_terminate=False,
-                                   per_run_time_limit=10,
-                                   ensemble_size=0,
-                                   dask_client=dask_client,
-                                   exclude={'feature_preprocessor': ['fast_ica']})
+    X_train, y_train, X_test, y_test = putil.get_dataset("iris")
+    automl = AutoSklearnClassifier(
+        time_left_for_this_task=60,
+        delete_tmp_folder_after_terminate=False,
+        per_run_time_limit=10,
+        ensemble_size=0,
+        dask_client=dask_client,
+        exclude={"feature_preprocessor": ["fast_ica"]},
+    )
 
     automl_fitted = automl.fit(X_train, y_train)
 
@@ -936,12 +1016,14 @@ def test_autosklearn_classification_methods_returns_self(dask_client):
 # Currently this class only tests that the methods of AutoSklearnRegressor
 # that should return self actually return self.
 def test_autosklearn_regression_methods_returns_self(dask_client):
-    X_train, y_train, X_test, y_test = putil.get_dataset('boston')
-    automl = AutoSklearnRegressor(time_left_for_this_task=30,
-                                  delete_tmp_folder_after_terminate=False,
-                                  per_run_time_limit=5,
-                                  dask_client=dask_client,
-                                  ensemble_size=0)
+    X_train, y_train, X_test, y_test = putil.get_dataset("boston")
+    automl = AutoSklearnRegressor(
+        time_left_for_this_task=30,
+        delete_tmp_folder_after_terminate=False,
+        per_run_time_limit=5,
+        dask_client=dask_client,
+        ensemble_size=0,
+    )
 
     automl_fitted = automl.fit(X_train, y_train)
     assert automl is automl_fitted
@@ -954,10 +1036,13 @@ def test_autosklearn_regression_methods_returns_self(dask_client):
 
 
 def test_autosklearn2_classification_methods_returns_self(dask_client):
-    X_train, y_train, X_test, y_test = putil.get_dataset('iris')
-    automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0,
-                                    delete_tmp_folder_after_terminate=False,
-                                    dask_client=dask_client)
+    X_train, y_train, X_test, y_test = putil.get_dataset("iris")
+    automl = AutoSklearn2Classifier(
+        time_left_for_this_task=60,
+        ensemble_size=0,
+        delete_tmp_folder_after_terminate=False,
+        dask_client=dask_client,
+    )
 
     automl_fitted = automl.fit(X_train, y_train)
 
@@ -971,18 +1056,23 @@ def test_autosklearn2_classification_methods_returns_self(dask_client):
     assert automl is automl_refitted
 
     predictions = automl_fitted.predict(X_test)
-    assert sklearn.metrics.accuracy_score(
-        y_test, predictions
-    ) >= 2 / 3, print_debug_information(automl)
+    assert (
+        sklearn.metrics.accuracy_score(y_test, predictions) >= 2 / 3
+    ), print_debug_information(automl)
 
     pickle.dumps(automl_fitted)
 
 
 def test_autosklearn2_classification_methods_returns_self_sparse(dask_client):
-    X_train, y_train, X_test, y_test = putil.get_dataset('breast_cancer', make_sparse=True)
-    automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0,
-                                    delete_tmp_folder_after_terminate=False,
-                                    dask_client=dask_client)
+    X_train, y_train, X_test, y_test = putil.get_dataset(
+        "breast_cancer", make_sparse=True
+    )
+    automl = AutoSklearn2Classifier(
+        time_left_for_this_task=60,
+        ensemble_size=0,
+        delete_tmp_folder_after_terminate=False,
+        dask_client=dask_client,
+    )
 
     automl_fitted = automl.fit(X_train, y_train)
 
@@ -996,32 +1086,39 @@ def test_autosklearn2_classification_methods_returns_self_sparse(dask_client):
     assert automl is automl_refitted
 
     predictions = automl_fitted.predict(X_test)
-    assert sklearn.metrics.accuracy_score(
-        y_test, predictions
-    ) >= 2 / 3, print_debug_information(automl)
+    assert (
+        sklearn.metrics.accuracy_score(y_test, predictions) >= 2 / 3
+    ), print_debug_information(automl)
 
     assert "boosting" not in str(automl.get_configuration_space(X=X_train, y=y_train))
 
     pickle.dumps(automl_fitted)
 
 
-@pytest.mark.parametrize("class_", [AutoSklearnClassifier, AutoSklearnRegressor,
-                                    AutoSklearn2Classifier])
+@pytest.mark.parametrize(
+    "class_", [AutoSklearnClassifier, AutoSklearnRegressor, AutoSklearn2Classifier]
+)
 def test_check_estimator_signature(class_):
     # Make sure signature is store in self
-    expected_subclass = ClassifierMixin if 'Classifier' in str(class_) else RegressorMixin
+    expected_subclass = (
+        ClassifierMixin if "Classifier" in str(class_) else RegressorMixin
+    )
     assert issubclass(class_, expected_subclass)
     estimator = class_()
     for expected in list(inspect.signature(class_).parameters):
         assert hasattr(estimator, expected)
 
 
-@pytest.mark.parametrize("selector_path", [None,  # No XDG_CACHE_HOME provided
-                                           '/',  # XDG_CACHE_HOME has no permission
-                                           tempfile.gettempdir(),  # in the user cache
-                                           ])
+@pytest.mark.parametrize(
+    "selector_path",
+    [
+        None,  # No XDG_CACHE_HOME provided
+        "/",  # XDG_CACHE_HOME has no permission
+        tempfile.gettempdir(),  # in the user cache
+    ],
+)
 def test_selector_file_askl2_can_be_created(selector_path):
-    with unittest.mock.patch('os.environ.get') as mock_foo:
+    with unittest.mock.patch("os.environ.get") as mock_foo:
         mock_foo.return_value = selector_path
         if selector_path is not None and not os.access(selector_path, os.W_OK):
             with pytest.raises(PermissionError):
@@ -1029,7 +1126,9 @@ def test_selector_file_askl2_can_be_created(selector_path):
         else:
             importlib.reload(autosklearn.experimental.askl2)
             for metric in autosklearn.experimental.askl2.metrics:
-                assert os.path.exists(autosklearn.experimental.askl2.selector_files[metric.name])
+                assert os.path.exists(
+                    autosklearn.experimental.askl2.selector_files[metric.name]
+                )
                 if selector_path is None or not os.access(selector_path, os.W_OK):
                     # We default to home in worst case
                     assert os.path.expanduser("~") in str(
@@ -1047,34 +1146,38 @@ def test_selector_file_askl2_can_be_created(selector_path):
 def test_check_askl2_same_arguments_as_askl() -> None:
     """Check the asklearn2 has the same args as asklearn1
 
-    This test is useful for when adding args to asklearn1 to make sure we update asklearn2
-
     Expects
     -------
-    * The set of arguments for AutoSklearnClassifier is the same as AutoSklearn2Classifier
-        except for a few expected arugments
+    * The set of arguments for AutoSklearnClassifier is the same as
+        AutoSklearn2Classifier except for a few expected arugments.
     """
-    autosklearn1_classifier_args = set(inspect.getfullargspec(AutoSklearnEstimator.__init__).args)
-    autosklearn2_classifier_args = set(inspect.getfullargspec(AutoSklearn2Classifier.__init__).args)
+    autosklearn1_classifier_args = set(
+        inspect.getfullargspec(AutoSklearnEstimator.__init__).args
+    )
+    autosklearn2_classifier_args = set(
+        inspect.getfullargspec(AutoSklearn2Classifier.__init__).args
+    )
     extra_arguments = autosklearn1_classifier_args - autosklearn2_classifier_args
 
-    expected_extra_args = set([
-        'exclude',
-        'include',
-        'resampling_strategy_arguments',
-        'get_smac_object_callback',
-        'initial_configurations_via_metalearning',
-        'resampling_strategy',
-        'metadata_directory',
-        'get_trials_callback',
-    ])
+    expected_extra_args = set(
+        [
+            "exclude",
+            "include",
+            "resampling_strategy_arguments",
+            "get_smac_object_callback",
+            "initial_configurations_via_metalearning",
+            "resampling_strategy",
+            "metadata_directory",
+            "get_trials_callback",
+        ]
+    )
     unexpected_args = extra_arguments - expected_extra_args
 
     assert len(unexpected_args) == 0, unexpected_args
 
 
-@pytest.mark.parametrize("task_type", ['classification', 'regression'])
-@pytest.mark.parametrize("resampling_strategy", ['test', 'cv', 'holdout'])
+@pytest.mark.parametrize("task_type", ["classification", "regression"])
+@pytest.mark.parametrize("resampling_strategy", ["test", "cv", "holdout"])
 @pytest.mark.parametrize("disable_file_output", [True, False])
 def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_output):
     """
@@ -1082,14 +1185,16 @@ def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_
     space, fit a classification pipeline with an acceptable score
     """
     X_train, y_train, X_test, y_test = putil.get_dataset(
-        'iris' if task_type == 'classification' else 'boston'
+        "iris" if task_type == "classification" else "boston"
+    )
+    estimator = (
+        AutoSklearnClassifier if task_type == "classification" else AutoSklearnRegressor
     )
-    estimator = AutoSklearnClassifier if task_type == 'classification' else AutoSklearnRegressor
     seed = 3
     if task_type == "classification":
-        include = {'classifier': ['random_forest']}
+        include = {"classifier": ["random_forest"]}
     else:
-        include = {'regressor': ['random_forest']}
+        include = {"regressor": ["random_forest"]}
     automl = estimator(
         delete_tmp_folder_after_terminate=False,
         time_left_for_this_task=120,
@@ -1101,11 +1206,16 @@ def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_
         include=include,
         seed=seed,
         # We cannot get the configuration space with 'test' not fit with it
-        resampling_strategy=resampling_strategy if resampling_strategy != 'test' else 'holdout',
+        resampling_strategy=resampling_strategy
+        if resampling_strategy != "test"
+        else "holdout",
     )
-    config = automl.get_configuration_space(X_train, y_train,
-                                            X_test=X_test, y_test=y_test,
-                                            ).get_default_configuration()
+    config = automl.get_configuration_space(
+        X_train,
+        y_train,
+        X_test=X_test,
+        y_test=y_test,
+    ).get_default_configuration()
 
     pipeline, run_info, run_value = automl.fit_pipeline(
         X=X_train,
@@ -1114,7 +1224,7 @@ def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_
         X_test=X_test,
         y_test=y_test,
         disable_file_output=disable_file_output,
-        resampling_strategy=resampling_strategy
+        resampling_strategy=resampling_strategy,
     )
 
     assert isinstance(run_info.config, Configuration)
@@ -1124,20 +1234,20 @@ def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_
     assert run_value.cost < 0.2
 
     # Make sure that the pipeline can be pickled
-    dump_file = os.path.join(tempfile.gettempdir(), 'automl.dump.pkl')
-    with open(dump_file, 'wb') as f:
+    dump_file = os.path.join(tempfile.gettempdir(), "automl.dump.pkl")
+    with open(dump_file, "wb") as f:
         pickle.dump(pipeline, f)
 
-    if resampling_strategy == 'test' or disable_file_output:
+    if resampling_strategy == "test" or disable_file_output:
         # We do not produce a pipeline in 'test'
         assert pipeline is None
-    elif resampling_strategy == 'cv':
+    elif resampling_strategy == "cv":
         # We should have fitted a Voting estimator
-        assert hasattr(pipeline, 'estimators_')
+        assert hasattr(pipeline, "estimators_")
     else:
         # We should have fitted a pipeline with named_steps
-        assert hasattr(pipeline, 'named_steps')
-        assert 'RandomForest' in pipeline.steps[-1][-1].choice.__class__.__name__
+        assert hasattr(pipeline, "named_steps")
+        assert "RandomForest" in pipeline.steps[-1][-1].choice.__class__.__name__
 
     # Num run should be 2, as 1 is for dummy classifier and we have not launch
     # another pipeline
@@ -1145,25 +1255,30 @@ def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_
 
     # Check the re-sampling strategy
     num_run_dir = automl.automl_._backend.get_numrun_directory(
-        seed, num_run, budget=0.0)
-    cv_model_path = os.path.join(num_run_dir, automl.automl_._backend.get_cv_model_filename(
-        seed, num_run, budget=0.0))
-    model_path = os.path.join(num_run_dir, automl.automl_._backend.get_model_filename(
-        seed, num_run, budget=0.0))
-    if resampling_strategy == 'test' or disable_file_output:
+        seed, num_run, budget=0.0
+    )
+    cv_model_path = os.path.join(
+        num_run_dir,
+        automl.automl_._backend.get_cv_model_filename(seed, num_run, budget=0.0),
+    )
+    model_path = os.path.join(
+        num_run_dir,
+        automl.automl_._backend.get_model_filename(seed, num_run, budget=0.0),
+    )
+    if resampling_strategy == "test" or disable_file_output:
         # No file output is expected
         assert not os.path.exists(num_run_dir)
     else:
         # We expect the model path always
         # And the cv model only on 'cv'
         assert os.path.exists(model_path)
-        if resampling_strategy == 'cv':
+        if resampling_strategy == "cv":
             assert os.path.exists(cv_model_path)
-        elif resampling_strategy == 'holdout':
+        elif resampling_strategy == "holdout":
             assert not os.path.exists(cv_model_path)
 
 
-@pytest.mark.parametrize("data_type", ['pandas', 'numpy'])
+@pytest.mark.parametrize("data_type", ["pandas", "numpy"])
 @pytest.mark.parametrize("include_categorical", [True, False])
 def test_pass_categorical_and_numeric_columns_to_pipeline(
     dask_client, data_type, include_categorical
@@ -1179,17 +1294,17 @@ def test_pass_categorical_and_numeric_columns_to_pipeline(
     if include_categorical:
         X = np.insert(X, n_features, values=0, axis=1)
 
-    if data_type == 'pandas':
+    if data_type == "pandas":
         X = pd.DataFrame(X)
         y = pd.DataFrame(y, dtype="category")
 
         # Set the last column to categorical
         if include_categorical:
-            X.loc[:, n_features] = X.loc[:, n_features].astype('category')  # type: ignore
+            X.loc[:, n_features] = X.loc[:, n_features].astype("category")
 
     # Specify the feature_types
-    if data_type == 'numpy' and include_categorical:
-        feat_type = ['numerical'] * n_features + ['categorical']
+    if data_type == "numpy" and include_categorical:
+        feat_type = ["numerical"] * n_features + ["categorical"]
     else:
         feat_type = None
 
@@ -1207,17 +1322,25 @@ def test_pass_categorical_and_numeric_columns_to_pipeline(
         ensemble_size=0,
         seed=0,
         dask_client=dask_client,
-        include={'classifier': ['random_forest']},
+        include={"classifier": ["random_forest"]},
     )
 
     config_space = automl.get_configuration_space(
-        X_train, y_train, X_test=X_test, y_test=y_test, feat_type=feat_type,
+        X_train,
+        y_train,
+        X_test=X_test,
+        y_test=y_test,
+        feat_type=feat_type,
     )
     config = config_space.get_default_configuration()
 
     pipeline, _, run_value = automl.fit_pipeline(
-        X=X_train, y=y_train, X_test=X_test, y_test=y_test,
-        config=config, feat_type=feat_type,
+        X=X_train,
+        y=y_train,
+        X_test=X_test,
+        y_test=y_test,
+        config=config,
+        feat_type=feat_type,
     )
 
     assert pipeline is not None, "Expected a pipeline from automl.fit_pipeline"
@@ -1237,18 +1360,17 @@ def test_pass_categorical_and_numeric_columns_to_pipeline(
     if include_categorical:
         expected_feat_types = {
             i: feature_type
-            for i, feature_type
-            in enumerate(['numerical'] * (n_columns-1) + ['categorical'])
+            for i, feature_type in enumerate(
+                ["numerical"] * (n_columns - 1) + ["categorical"]
+            )
         }
 
     else:
         expected_feat_types = {
-            i: feature_type
-            for i, feature_type
-            in enumerate(['numerical'] * n_columns)
+            i: feature_type for i, feature_type in enumerate(["numerical"] * n_columns)
         }
 
-    pipeline_feat_types = pipeline.named_steps['data_preprocessor'].choice.feat_type
+    pipeline_feat_types = pipeline.named_steps["data_preprocessor"].choice.feat_type
     assert expected_feat_types == pipeline_feat_types
 
 
@@ -1260,20 +1382,27 @@ def test_autosklearn_anneal(as_frame):
     so is a good testcase for unit-testing
     """
     X, y = sklearn.datasets.fetch_openml(data_id=2, return_X_y=True, as_frame=as_frame)
-    automl = AutoSklearnClassifier(time_left_for_this_task=60, ensemble_size=0,
-                                   delete_tmp_folder_after_terminate=False,
-                                   initial_configurations_via_metalearning=0,
-                                   smac_scenario_args={'runcount_limit': 6},
-                                   resampling_strategy='holdout-iterative-fit')
+    automl = AutoSklearnClassifier(
+        time_left_for_this_task=60,
+        ensemble_size=0,
+        delete_tmp_folder_after_terminate=False,
+        initial_configurations_via_metalearning=0,
+        smac_scenario_args={"runcount_limit": 6},
+        resampling_strategy="holdout-iterative-fit",
+    )
 
     if as_frame:
         # Let autosklearn calculate the feat types
         automl_fitted = automl.fit(X, y)
 
     else:
-        X_, y_ = sklearn.datasets.fetch_openml(data_id=2, return_X_y=True, as_frame=True)
-        feat_type = ['categorical' if X_[col].dtype.name == 'category' else 'numerical'
-                     for col in X_.columns]
+        X_, y_ = sklearn.datasets.fetch_openml(
+            data_id=2, return_X_y=True, as_frame=True
+        )
+        feat_type = [
+            "categorical" if X_[col].dtype.name == "category" else "numerical"
+            for col in X_.columns
+        ]
 
         automl_fitted = automl.fit(X, y, feat_type=feat_type)
 
@@ -1289,9 +1418,9 @@ def test_autosklearn_anneal(as_frame):
     assert automl_fitted.score(X, y) > 0.75
 
 
-@pytest.mark.parametrize("dataset_compression", [
-    False, True, {"memory_allocation": 0.2}
-])
+@pytest.mark.parametrize(
+    "dataset_compression", [False, True, {"memory_allocation": 0.2}]
+)
 def test_param_dataset_compression(dataset_compression: Union[bool, Dict[str, Any]]):
     """We expect this does not get parsed and modified until it gets to the AutoML class,
     In the meantime, it's value remains whatever was passed in.
diff --git a/test/test_data/__init__.py b/test/test_data/__init__.py
index cc3cd7becd..e298f0f075 100644
--- a/test/test_data/__init__.py
+++ b/test/test_data/__init__.py
@@ -1,2 +1,2 @@
 # -*- encoding: utf-8 -*-
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index 012ef1a179..0414cd31b4 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -1,14 +1,10 @@
 import numpy as np
-
 import pandas as pd
-from pandas.api.types import is_numeric_dtype, is_categorical_dtype, is_string_dtype
-
 import pytest
-
-from scipy import sparse
-
 import sklearn.datasets
 import sklearn.model_selection
+from pandas.api.types import is_categorical_dtype, is_numeric_dtype, is_string_dtype
+from scipy import sparse
 
 from autosklearn.data.feature_validator import FeatureValidator
 
@@ -16,118 +12,135 @@
 # Fixtures to be used in this class. By default all elements have 100 datapoints
 @pytest.fixture
 def input_data_featuretest(request):
-    if request.param == 'numpy_categoricalonly_nonan':
+    if request.param == "numpy_categoricalonly_nonan":
         return np.random.randint(10, size=(100, 10))
-    elif request.param == 'numpy_numericalonly_nonan':
+    elif request.param == "numpy_numericalonly_nonan":
         return np.random.uniform(10, size=(100, 10))
-    elif request.param == 'numpy_mixed_nonan':
-        return np.column_stack([
-            np.random.uniform(10, size=(100, 3)),
-            np.random.randint(10, size=(100, 3)),
-            np.random.uniform(10, size=(100, 3)),
-            np.random.randint(10, size=(100, 1)),
-        ])
-    elif request.param == 'numpy_string_nonan':
-        return np.array([
-            ['a', 'b', 'c', 'a', 'b', 'c'],
-            ['a', 'b', 'd', 'r', 'b', 'c'],
-        ])
-    elif request.param == 'numpy_categoricalonly_nan':
-        array = np.random.randint(10, size=(100, 10)).astype('float')
+    elif request.param == "numpy_mixed_nonan":
+        return np.column_stack(
+            [
+                np.random.uniform(10, size=(100, 3)),
+                np.random.randint(10, size=(100, 3)),
+                np.random.uniform(10, size=(100, 3)),
+                np.random.randint(10, size=(100, 1)),
+            ]
+        )
+    elif request.param == "numpy_string_nonan":
+        return np.array(
+            [
+                ["a", "b", "c", "a", "b", "c"],
+                ["a", "b", "d", "r", "b", "c"],
+            ]
+        )
+    elif request.param == "numpy_categoricalonly_nan":
+        array = np.random.randint(10, size=(100, 10)).astype("float")
         array[50, 0:5] = np.nan
         return array
-    elif request.param == 'numpy_numericalonly_nan':
-        array = np.random.uniform(10, size=(100, 10)).astype('float')
+    elif request.param == "numpy_numericalonly_nan":
+        array = np.random.uniform(10, size=(100, 10)).astype("float")
         array[50, 0:5] = np.nan
         # Somehow array is changed to dtype object after np.nan
-        return array.astype('float')
-    elif request.param == 'numpy_mixed_nan':
-        array = np.column_stack([
-            np.random.uniform(10, size=(100, 3)),
-            np.random.randint(10, size=(100, 3)),
-            np.random.uniform(10, size=(100, 3)),
-            np.random.randint(10, size=(100, 1)),
-        ])
+        return array.astype("float")
+    elif request.param == "numpy_mixed_nan":
+        array = np.column_stack(
+            [
+                np.random.uniform(10, size=(100, 3)),
+                np.random.randint(10, size=(100, 3)),
+                np.random.uniform(10, size=(100, 3)),
+                np.random.randint(10, size=(100, 1)),
+            ]
+        )
         array[50, 0:5] = np.nan
         return array
-    elif request.param == 'numpy_string_nan':
-        return np.array([
-            ['a', 'b', 'c', 'a', 'b', 'c'],
-            [np.nan, 'b', 'd', 'r', 'b', 'c'],
-        ])
-    elif request.param == 'pandas_categoricalonly_nonan':
-        return pd.DataFrame([
-            {'A': 1, 'B': 2},
-            {'A': 3, 'B': 4},
-        ], dtype='category')
-    elif request.param == 'pandas_numericalonly_nonan':
-        return pd.DataFrame([
-            {'A': 1, 'B': 2},
-            {'A': 3, 'B': 4},
-        ], dtype='float')
-    elif request.param == 'pandas_mixed_nonan':
-        frame = pd.DataFrame([
-            {'A': 1, 'B': 2},
-            {'A': 3, 'B': 4},
-        ], dtype='category')
-        frame['B'] = pd.to_numeric(frame['B'])
+    elif request.param == "numpy_string_nan":
+        return np.array(
+            [
+                ["a", "b", "c", "a", "b", "c"],
+                [np.nan, "b", "d", "r", "b", "c"],
+            ]
+        )
+    elif request.param == "pandas_categoricalonly_nonan":
+        return pd.DataFrame(
+            [
+                {"A": 1, "B": 2},
+                {"A": 3, "B": 4},
+            ],
+            dtype="category",
+        )
+    elif request.param == "pandas_numericalonly_nonan":
+        return pd.DataFrame(
+            [
+                {"A": 1, "B": 2},
+                {"A": 3, "B": 4},
+            ],
+            dtype="float",
+        )
+    elif request.param == "pandas_mixed_nonan":
+        frame = pd.DataFrame(
+            [
+                {"A": 1, "B": 2},
+                {"A": 3, "B": 4},
+            ],
+            dtype="category",
+        )
+        frame["B"] = pd.to_numeric(frame["B"])
         return frame
-    elif request.param == 'pandas_categoricalonly_nan':
-        return pd.DataFrame([
-            {'A': 1, 'B': 2, 'C': np.nan},
-            {'A': 3, 'C': np.nan},
-        ], dtype='category')
-    elif request.param == 'pandas_numericalonly_nan':
-        return pd.DataFrame([
-            {'A': 1, 'B': 2, 'C': np.nan},
-            {'A': 3, 'C': np.nan},
-        ], dtype='float')
-    elif request.param == 'pandas_mixed_nan':
-        frame = pd.DataFrame([
-            {'A': 1, 'B': 2, 'C': 8},
-            {'A': 3, 'B': 4},
-        ], dtype='category')
-        frame['B'] = pd.to_numeric(frame['B'])
+    elif request.param == "pandas_categoricalonly_nan":
+        return pd.DataFrame(
+            [
+                {"A": 1, "B": 2, "C": np.nan},
+                {"A": 3, "C": np.nan},
+            ],
+            dtype="category",
+        )
+    elif request.param == "pandas_numericalonly_nan":
+        return pd.DataFrame(
+            [
+                {"A": 1, "B": 2, "C": np.nan},
+                {"A": 3, "C": np.nan},
+            ],
+            dtype="float",
+        )
+    elif request.param == "pandas_mixed_nan":
+        frame = pd.DataFrame(
+            [
+                {"A": 1, "B": 2, "C": 8},
+                {"A": 3, "B": 4},
+            ],
+            dtype="category",
+        )
+        frame["B"] = pd.to_numeric(frame["B"])
         return frame
-    elif request.param == 'pandas_string_nonan':
-        return pd.DataFrame([
-            {'A': 1, 'B': 2},
-            {'A': 3, 'B': 4},
-        ], dtype='string')
-    elif request.param == 'list_categoricalonly_nonan':
-        return [
-            ['a', 'b', 'c', 'd'],
-            ['e', 'f', 'c', 'd'],
-        ]
-    elif request.param == 'list_numericalonly_nonan':
-        return [
-            [1, 2, 3, 4],
-            [5, 6, 7, 8]
-        ]
-    elif request.param == 'list_mixed_nonan':
-        return [
-            ['a', 2, 3, 4],
-            ['b', 6, 7, 8]
-        ]
-    elif request.param == 'list_categoricalonly_nan':
-        return [
-            ['a', 'b', 'c', np.nan],
-            ['e', 'f', 'c', 'd'],
-        ]
-    elif request.param == 'list_numericalonly_nan':
+    elif request.param == "pandas_string_nonan":
+        return pd.DataFrame(
+            [
+                {"A": 1, "B": 2},
+                {"A": 3, "B": 4},
+            ],
+            dtype="string",
+        )
+    elif request.param == "list_categoricalonly_nonan":
         return [
-            [1, 2, 3, np.nan],
-            [5, 6, 7, 8]
+            ["a", "b", "c", "d"],
+            ["e", "f", "c", "d"],
         ]
-    elif request.param == 'list_mixed_nan':
+    elif request.param == "list_numericalonly_nonan":
+        return [[1, 2, 3, 4], [5, 6, 7, 8]]
+    elif request.param == "list_mixed_nonan":
+        return [["a", 2, 3, 4], ["b", 6, 7, 8]]
+    elif request.param == "list_categoricalonly_nan":
         return [
-            ['a', np.nan, 3, 4],
-            ['b', 6, 7, 8]
+            ["a", "b", "c", np.nan],
+            ["e", "f", "c", "d"],
         ]
-    elif 'sparse' in request.param:
+    elif request.param == "list_numericalonly_nan":
+        return [[1, 2, 3, np.nan], [5, 6, 7, 8]]
+    elif request.param == "list_mixed_nan":
+        return [["a", np.nan, 3, 4], ["b", 6, 7, 8]]
+    elif "sparse" in request.param:
         # We expect the names to be of the type sparse_csc_nonan
-        sparse_, type_, nan_ = request.param.split('_')
-        if 'nonan' in nan_:
+        sparse_, type_, nan_ = request.param.split("_")
+        if "nonan" in nan_:
             data = np.ones(3)
         else:
             data = np.array([1, 2, np.nan])
@@ -135,26 +148,27 @@ def input_data_featuretest(request):
         # Then the type of sparse
         row_ind = np.array([0, 1, 2])
         col_ind = np.array([1, 2, 1])
-        if 'csc' in type_:
+        if "csc" in type_:
             return sparse.csc_matrix((data, (row_ind, col_ind)))
-        elif 'csr' in type_:
+        elif "csr" in type_:
             return sparse.csr_matrix((data, (row_ind, col_ind)))
-        elif 'coo' in type_:
+        elif "coo" in type_:
             return sparse.coo_matrix((data, (row_ind, col_ind)))
-        elif 'bsr' in type_:
+        elif "bsr" in type_:
             return sparse.bsr_matrix((data, (row_ind, col_ind)))
-        elif 'lil' in type_:
+        elif "lil" in type_:
             return sparse.lil_matrix((data))
-        elif 'dok' in type_:
+        elif "dok" in type_:
             return sparse.dok_matrix(np.vstack((data, data, data)))
-        elif 'dia' in type_:
+        elif "dia" in type_:
             return sparse.dia_matrix(np.vstack((data, data, data)))
         else:
             ValueError("Unsupported indirect fixture {}".format(request.param))
-    elif 'openml' in request.param:
-        _, openml_id = request.param.split('_')
-        X, y = sklearn.datasets.fetch_openml(data_id=int(openml_id),
-                                             return_X_y=True, as_frame=True)
+    elif "openml" in request.param:
+        _, openml_id = request.param.split("_")
+        X, y = sklearn.datasets.fetch_openml(
+            data_id=int(openml_id), return_X_y=True, as_frame=True
+        )
         return X
     else:
         ValueError("Unsupported indirect fixture {}".format(request.param))
@@ -162,37 +176,37 @@ def input_data_featuretest(request):
 
 # Actual checks for the features
 @pytest.mark.parametrize(
-    'input_data_featuretest',
+    "input_data_featuretest",
     (
-        'numpy_categoricalonly_nonan',
-        'numpy_numericalonly_nonan',
-        'numpy_mixed_nonan',
-        'numpy_categoricalonly_nan',
-        'numpy_numericalonly_nan',
-        'numpy_mixed_nan',
-        'pandas_categoricalonly_nonan',
-        'pandas_numericalonly_nonan',
-        'pandas_mixed_nonan',
-        'pandas_numericalonly_nan',
-        'list_numericalonly_nonan',
-        'list_numericalonly_nan',
-        'sparse_bsr_nonan',
-        'sparse_bsr_nan',
-        'sparse_coo_nonan',
-        'sparse_coo_nan',
-        'sparse_csc_nonan',
-        'sparse_csc_nan',
-        'sparse_csr_nonan',
-        'sparse_csr_nan',
-        'sparse_dia_nonan',
-        'sparse_dia_nan',
-        'sparse_dok_nonan',
-        'sparse_dok_nan',
-        'sparse_lil_nonan',
-        'sparse_lil_nan',
-        'openml_40981',  # Australian
+        "numpy_categoricalonly_nonan",
+        "numpy_numericalonly_nonan",
+        "numpy_mixed_nonan",
+        "numpy_categoricalonly_nan",
+        "numpy_numericalonly_nan",
+        "numpy_mixed_nan",
+        "pandas_categoricalonly_nonan",
+        "pandas_numericalonly_nonan",
+        "pandas_mixed_nonan",
+        "pandas_numericalonly_nan",
+        "list_numericalonly_nonan",
+        "list_numericalonly_nan",
+        "sparse_bsr_nonan",
+        "sparse_bsr_nan",
+        "sparse_coo_nonan",
+        "sparse_coo_nan",
+        "sparse_csc_nonan",
+        "sparse_csc_nan",
+        "sparse_csr_nonan",
+        "sparse_csr_nan",
+        "sparse_dia_nonan",
+        "sparse_dia_nan",
+        "sparse_dok_nonan",
+        "sparse_dok_nan",
+        "sparse_lil_nonan",
+        "sparse_lil_nan",
+        "openml_40981",  # Australian
     ),
-    indirect=True
+    indirect=True,
 )
 def test_featurevalidator_supported_types(input_data_featuretest):
     validator = FeatureValidator()
@@ -209,43 +223,45 @@ def test_featurevalidator_supported_types(input_data_featuretest):
 
 
 @pytest.mark.parametrize(
-    'input_data_featuretest',
+    "input_data_featuretest",
     (
-        'numpy_string_nonan',
-        'numpy_string_nan',
+        "numpy_string_nonan",
+        "numpy_string_nan",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_featurevalidator_unsupported_numpy(input_data_featuretest):
     validator = FeatureValidator()
-    with pytest.raises(ValueError, match=r".*When providing a numpy array.*not supported."):
+    with pytest.raises(
+        ValueError, match=r".*When providing a numpy array.*not supported."
+    ):
         validator.fit(input_data_featuretest)
 
 
 @pytest.mark.parametrize(
-    'input_data_featuretest',
+    "input_data_featuretest",
     (
-        'numpy_categoricalonly_nonan',
-        'numpy_mixed_nonan',
-        'numpy_categoricalonly_nan',
-        'numpy_mixed_nan',
-        'pandas_categoricalonly_nonan',
-        'pandas_mixed_nonan',
-        'sparse_bsr_nonan',
-        'sparse_bsr_nan',
-        'sparse_coo_nonan',
-        'sparse_coo_nan',
-        'sparse_csc_nonan',
-        'sparse_csc_nan',
-        'sparse_csr_nonan',
-        'sparse_csr_nan',
-        'sparse_dia_nonan',
-        'sparse_dia_nan',
-        'sparse_dok_nonan',
-        'sparse_dok_nan',
-        'sparse_lil_nonan',
+        "numpy_categoricalonly_nonan",
+        "numpy_mixed_nonan",
+        "numpy_categoricalonly_nan",
+        "numpy_mixed_nan",
+        "pandas_categoricalonly_nonan",
+        "pandas_mixed_nonan",
+        "sparse_bsr_nonan",
+        "sparse_bsr_nan",
+        "sparse_coo_nonan",
+        "sparse_coo_nan",
+        "sparse_csc_nonan",
+        "sparse_csc_nan",
+        "sparse_csr_nonan",
+        "sparse_csr_nan",
+        "sparse_dia_nonan",
+        "sparse_dia_nan",
+        "sparse_dok_nonan",
+        "sparse_dok_nan",
+        "sparse_lil_nonan",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_featurevalidator_fitontypeA_transformtypeB(input_data_featuretest):
     """
@@ -276,20 +292,24 @@ def test_featurevalidatorget_feat_type_from_columns():
     """
     validator = FeatureValidator()
 
-    df = pd.DataFrame([
-        {'int': 1, 'float': 1.0, 'category': 'one', 'bool': True},
-        {'int': 2, 'float': 2.0, 'category': 'two', 'bool': False},
-    ])
+    df = pd.DataFrame(
+        [
+            {"int": 1, "float": 1.0, "category": "one", "bool": True},
+            {"int": 2, "float": 2.0, "category": "two", "bool": False},
+        ]
+    )
 
     for col in df.columns:
         df[col] = df[col].astype(col)
 
     feature_types = validator.get_feat_type_from_columns(df)
 
-    assert feature_types == {'int': 'numerical',
-                             'float': 'numerical',
-                             'category': 'categorical',
-                             'bool': 'categorical'}
+    assert feature_types == {
+        "int": "numerical",
+        "float": "numerical",
+        "category": "categorical",
+        "bool": "categorical",
+    }
 
 
 def test_features_unsupported_calls_are_raised():
@@ -300,28 +320,37 @@ def test_features_unsupported_calls_are_raised():
     """
     validator = FeatureValidator()
     with pytest.raises(ValueError, match=r"Auto-sklearn does not support time"):
+        validator.fit(pd.DataFrame({"datetime": [pd.Timestamp("20180310")]}))
+    with pytest.raises(
+        ValueError, match=r"Auto-sklearn only supports.*yet, the provided input"
+    ):
+        validator.fit({"input1": 1, "input2": 2})
+    validator = FeatureValidator()
+    with pytest.raises(
+        ValueError, match=r"The feature dimensionality of the train and test"
+    ):
         validator.fit(
-            pd.DataFrame({'datetime': [pd.Timestamp('20180310')]})
+            X_train=np.array([[1, 2, 3], [4, 5, 6]]),
+            X_test=np.array([[1, 2, 3, 4], [4, 5, 6, 7]]),
         )
-    with pytest.raises(ValueError, match=r"Auto-sklearn only supports.*yet, the provided input"):
-        validator.fit({'input1': 1, 'input2': 2})
-    validator = FeatureValidator()
-    with pytest.raises(ValueError, match=r"The feature dimensionality of the train and test"):
-        validator.fit(X_train=np.array([[1, 2, 3], [4, 5, 6]]),
-                      X_test=np.array([[1, 2, 3, 4], [4, 5, 6, 7]]),
-                      )
-    with pytest.raises(ValueError, match=r"Cannot call transform on a validator that is not fit"):
+    with pytest.raises(
+        ValueError, match=r"Cannot call transform on a validator that is not fit"
+    ):
         validator.transform(np.array([[1, 2, 3], [4, 5, 6]]))
-    validator = FeatureValidator(feat_type=['Numerical'])
-    with pytest.raises(ValueError, match=r"providing the option feat_type to the fit method is.*"):
+    validator = FeatureValidator(feat_type=["Numerical"])
+    with pytest.raises(
+        ValueError, match=r"providing the option feat_type to the fit method is.*"
+    ):
         validator.fit(pd.DataFrame([[1, 2, 3], [4, 5, 6]]))
     with pytest.raises(ValueError, match=r"feat_type does not have same number of.*"):
         validator.fit(np.array([[1, 2, 3], [4, 5, 6]]))
     validator = FeatureValidator(feat_type=[1, 2, 3])
     with pytest.raises(ValueError, match=r"feat_type must only contain strings.*"):
         validator.fit(np.array([[1, 2, 3], [4, 5, 6]]))
-    validator = FeatureValidator(feat_type=['1', '2', '3'])
-    with pytest.raises(ValueError, match=r"Only `Categorical`, `Numerical` and `String` are.*"):
+    validator = FeatureValidator(feat_type=["1", "2", "3"])
+    with pytest.raises(
+        ValueError, match=r"Only `Categorical`, `Numerical` and `String` are.*"
+    ):
         validator.fit(np.array([[1, 2, 3], [4, 5, 6]]))
 
 
@@ -331,16 +360,16 @@ def test_no_new_category_after_fit():
     without throwing an error
     """
     # Then make sure we catch categorical extra categories
-    x = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, dtype='category')
+    x = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, dtype="category")
     validator = FeatureValidator()
     validator.fit(x)
-    x['A'] = x['A'].apply(lambda x: x*x)
+    x["A"] = x["A"].apply(lambda x: x * x)
     validator.transform(x)
 
 
 # Actual checks for the features
 @pytest.mark.parametrize(
-    'openml_id',
+    "openml_id",
     (
         40981,  # Australian
         3,  # kr-vs-kp
@@ -349,32 +378,37 @@ def test_no_new_category_after_fit():
         40984,  # Segment
     ),
 )
-@pytest.mark.parametrize('train_data_type', ('numpy', 'pandas', 'list'))
-@pytest.mark.parametrize('test_data_type', ('numpy', 'pandas', 'list'))
-def test_featurevalidator_new_data_after_fit(openml_id,
-                                             train_data_type, test_data_type):
+@pytest.mark.parametrize("train_data_type", ("numpy", "pandas", "list"))
+@pytest.mark.parametrize("test_data_type", ("numpy", "pandas", "list"))
+def test_featurevalidator_new_data_after_fit(
+    openml_id, train_data_type, test_data_type
+):
 
     # List is currently not supported as infer_objects
     # cast list objects to type objects
-    if train_data_type == 'list' or test_data_type == 'list':
+    if train_data_type == "list" or test_data_type == "list":
         pytest.skip()
 
     validator = FeatureValidator()
 
-    if train_data_type == 'numpy':
-        X, y = sklearn.datasets.fetch_openml(data_id=openml_id,
-                                             return_X_y=True, as_frame=False)
-    elif train_data_type == 'pandas':
-        X, y = sklearn.datasets.fetch_openml(data_id=openml_id,
-                                             return_X_y=True, as_frame=True)
+    if train_data_type == "numpy":
+        X, y = sklearn.datasets.fetch_openml(
+            data_id=openml_id, return_X_y=True, as_frame=False
+        )
+    elif train_data_type == "pandas":
+        X, y = sklearn.datasets.fetch_openml(
+            data_id=openml_id, return_X_y=True, as_frame=True
+        )
     else:
-        X, y = sklearn.datasets.fetch_openml(data_id=openml_id,
-                                             return_X_y=True, as_frame=True)
+        X, y = sklearn.datasets.fetch_openml(
+            data_id=openml_id, return_X_y=True, as_frame=True
+        )
         X = X.values.tolist()
         y = y.values.tolist()
 
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X, y, random_state=1)
+        X, y, random_state=1
+    )
 
     validator.fit(X_train)
 
@@ -391,7 +425,7 @@ def test_featurevalidator_new_data_after_fit(openml_id,
 
 
 @pytest.mark.parametrize(
-    'openml_id',
+    "openml_id",
     (
         40981,  # Australian
         3,  # kr-vs-kp
@@ -403,10 +437,12 @@ def test_featurevalidator_new_data_after_fit(openml_id,
 )
 def test_list_to_dataframe(openml_id):
 
-    X_pandas, y_pandas = sklearn.datasets.fetch_openml(data_id=openml_id,
-                                                       return_X_y=True, as_frame=True)
+    X_pandas, y_pandas = sklearn.datasets.fetch_openml(
+        data_id=openml_id, return_X_y=True, as_frame=True
+    )
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        X_pandas, y_pandas, random_state=1)
+        X_pandas, y_pandas, random_state=1
+    )
 
     X_list = X_train.values.tolist()
     validator = FeatureValidator()
@@ -439,24 +475,24 @@ def test_list_to_dataframe(openml_id):
 
 
 @pytest.mark.parametrize(
-    'input_data_featuretest',
+    "input_data_featuretest",
     (
-        'sparse_bsr_nonan',
-        'sparse_bsr_nan',
-        'sparse_coo_nonan',
-        'sparse_coo_nan',
-        'sparse_csc_nonan',
-        'sparse_csc_nan',
-        'sparse_csr_nonan',
-        'sparse_csr_nan',
-        'sparse_dia_nonan',
-        'sparse_dia_nan',
-        'sparse_dok_nonan',
-        'sparse_dok_nan',
-        'sparse_lil_nonan',
-        'sparse_lil_nan',
+        "sparse_bsr_nonan",
+        "sparse_bsr_nan",
+        "sparse_coo_nonan",
+        "sparse_coo_nan",
+        "sparse_csc_nonan",
+        "sparse_csc_nan",
+        "sparse_csr_nonan",
+        "sparse_csr_nan",
+        "sparse_dia_nonan",
+        "sparse_dia_nan",
+        "sparse_dok_nonan",
+        "sparse_dok_nan",
+        "sparse_lil_nonan",
+        "sparse_lil_nan",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_sparse_output_is_csr(input_data_featuretest):
     validator = FeatureValidator()
@@ -467,7 +503,9 @@ def test_sparse_output_is_csr(input_data_featuretest):
 
 
 def test_unsupported_dataframe_sparse():
-    df = pd.DataFrame({'A': pd.Series(pd.arrays.SparseArray(np.random.randn(10)))})
+    df = pd.DataFrame({"A": pd.Series(pd.arrays.SparseArray(np.random.randn(10)))})
     validator = FeatureValidator()
-    with pytest.raises(ValueError, match=r"Auto-sklearn does not yet support sparse pandas"):
+    with pytest.raises(
+        ValueError, match=r"Auto-sklearn does not yet support sparse pandas"
+    ):
         validator.fit(df)
diff --git a/test/test_data/test_target_validator.py b/test/test_data/test_target_validator.py
index 09e075b85f..e57f464c72 100644
--- a/test/test_data/test_target_validator.py
+++ b/test/test_data/test_target_validator.py
@@ -1,14 +1,10 @@
 import numpy as np
-
 import pandas as pd
-
 import pytest
-from pandas.api.types import is_numeric_dtype, is_bool_dtype
-
-from scipy import sparse
-
 import sklearn.datasets
 import sklearn.model_selection
+from pandas.api.types import is_bool_dtype, is_numeric_dtype
+from scipy import sparse
 from sklearn.utils.multiclass import type_of_target
 
 from autosklearn.data.target_validator import TargetValidator
@@ -17,80 +13,85 @@
 # Fixtures to be used in this class. By default all elements have 100 datapoints
 @pytest.fixture
 def input_data_targettest(request):
-    if request.param == 'series_binary':
+    if request.param == "series_binary":
         return pd.Series([1, -1, -1, 1])
-    elif request.param == 'series_multiclass':
+    elif request.param == "series_multiclass":
         return pd.Series([1, 0, 2])
-    elif request.param == 'series_multilabel':
+    elif request.param == "series_multilabel":
         return pd.Series([[1, 0], [0, 1]])
-    elif request.param == 'series_continuous':
+    elif request.param == "series_continuous":
         return pd.Series([0.1, 0.6, 0.7])
-    elif request.param == 'series_continuous-multioutput':
+    elif request.param == "series_continuous-multioutput":
         return pd.Series([[1.5, 2.0], [3.0, 1.6]])
-    elif request.param == 'pandas_binary':
+    elif request.param == "pandas_binary":
         return pd.DataFrame([1, -1, -1, 1])
-    elif request.param == 'pandas_multiclass':
+    elif request.param == "pandas_multiclass":
         return pd.DataFrame([1, 0, 2])
-    elif request.param == 'pandas_multilabel':
+    elif request.param == "pandas_multilabel":
         return pd.DataFrame([[1, 0], [0, 1]])
-    elif request.param == 'pandas_continuous':
+    elif request.param == "pandas_continuous":
         return pd.DataFrame([0.1, 0.6, 0.7])
-    elif request.param == 'pandas_continuous-multioutput':
+    elif request.param == "pandas_continuous-multioutput":
         return pd.DataFrame([[1.5, 2.0], [3.0, 1.6]])
-    elif request.param == 'numpy_binary':
+    elif request.param == "numpy_binary":
         return np.array([1, -1, -1, 1])
-    elif request.param == 'numpy_multiclass':
+    elif request.param == "numpy_multiclass":
         return np.array([1, 0, 2])
-    elif request.param == 'numpy_multilabel':
+    elif request.param == "numpy_multilabel":
         return np.array([[1, 0], [0, 1]])
-    elif request.param == 'numpy_continuous':
+    elif request.param == "numpy_continuous":
         return np.array([0.1, 0.6, 0.7])
-    elif request.param == 'numpy_continuous-multioutput':
+    elif request.param == "numpy_continuous-multioutput":
         return np.array([[1.5, 2.0], [3.0, 1.6]])
-    elif request.param == 'list_binary':
+    elif request.param == "list_binary":
         return [1, -1, -1, 1]
-    elif request.param == 'list_multiclass':
+    elif request.param == "list_multiclass":
         return [1, 0, 2]
-    elif request.param == 'list_multilabel':
+    elif request.param == "list_multilabel":
         return [[0, 1], [1, 0]]
-    elif request.param == 'list_continuous':
+    elif request.param == "list_continuous":
         return [0.1, 0.6, 0.7]
-    elif request.param == 'list_continuous-multioutput':
+    elif request.param == "list_continuous-multioutput":
         return [[1.5, 2.0], [3.0, 1.6]]
-    elif 'openml' in request.param:
-        _, openml_id = request.param.split('_')
-        X, y = sklearn.datasets.fetch_openml(data_id=int(openml_id),
-                                             return_X_y=True, as_frame=True)
-        if len(y.shape) > 1 and y.shape[1] > 1 and np.any(y.eq('TRUE').any(1).to_numpy()):
+    elif "openml" in request.param:
+        _, openml_id = request.param.split("_")
+        X, y = sklearn.datasets.fetch_openml(
+            data_id=int(openml_id), return_X_y=True, as_frame=True
+        )
+        if (
+            len(y.shape) > 1
+            and y.shape[1] > 1
+            and np.any(y.eq("TRUE").any(1).to_numpy())
+        ):
             # This 'if' is only asserted for multi-label data
             # Force the downloaded data to be interpreted as multilabel
             y = y.dropna()
-            y.replace('FALSE', 0, inplace=True)
-            y.replace('TRUE', 1, inplace=True)
+            y.replace("FALSE", 0, inplace=True)
+            y.replace("TRUE", 1, inplace=True)
             y = y.astype(int)
         return y
-    elif 'sparse' in request.param:
+    elif "sparse" in request.param:
         # We expect the names to be of the type sparse_csc_nonan
-        sparse_, type_, nan_ = request.param.split('_')
-        if 'nonan' in nan_:
+        sparse_, type_, nan_ = request.param.split("_")
+        if "nonan" in nan_:
             data = np.ones(3)
         else:
             data = np.array([1, 2, np.nan])
 
         # Then the type of sparse
-        if 'csc' in type_:
+        if "csc" in type_:
             return sparse.csc_matrix(data)
-        elif 'csr' in type_:
+        elif "csr" in type_:
             return sparse.csr_matrix(data)
-        elif 'coo' in type_:
+        elif "coo" in type_:
             return sparse.coo_matrix(data)
-        elif 'bsr' in type_:
+        elif "bsr" in type_:
             return sparse.bsr_matrix(data)
-        elif 'lil' in type_:
+        elif "lil" in type_:
             return sparse.lil_matrix(data)
-        elif 'dok' in type_:
+        elif "dok" in type_:
             return sparse.dok_matrix(np.vstack((data, data, data)))
-        elif 'dia' in type_:
+        elif "dia" in type_:
             return sparse.dia_matrix(np.vstack((data, data, data)))
         else:
             ValueError("Unsupported indirect fixture {}".format(request.param))
@@ -100,29 +101,29 @@ def input_data_targettest(request):
 
 # Actual checks for the targets
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_binary',
-        'series_multiclass',
-        'series_continuous',
-        'pandas_binary',
-        'pandas_multiclass',
-        'pandas_multilabel',
-        'pandas_continuous',
-        'pandas_continuous-multioutput',
-        'numpy_binary',
-        'numpy_multiclass',
-        'numpy_multilabel',
-        'numpy_continuous',
-        'numpy_continuous-multioutput',
-        'list_binary',
-        'list_multiclass',
-        'list_multilabel',
-        'list_continuous',
-        'list_continuous-multioutput',
-        'openml_204',
+        "series_binary",
+        "series_multiclass",
+        "series_continuous",
+        "pandas_binary",
+        "pandas_multiclass",
+        "pandas_multilabel",
+        "pandas_continuous",
+        "pandas_continuous-multioutput",
+        "numpy_binary",
+        "numpy_multiclass",
+        "numpy_multilabel",
+        "numpy_continuous",
+        "numpy_continuous-multioutput",
+        "list_binary",
+        "list_multiclass",
+        "list_multilabel",
+        "list_continuous",
+        "list_continuous-multioutput",
+        "openml_204",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_supported_types_noclassification(input_data_targettest):
     y = input_data_targettest
@@ -146,19 +147,19 @@ def test_targetvalidator_supported_types_noclassification(input_data_targettest)
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_binary',
-        'series_multiclass',
-        'pandas_binary',
-        'pandas_multiclass',
-        'numpy_binary',
-        'numpy_multiclass',
-        'list_binary',
-        'list_multiclass',
-        'openml_2',
+        "series_binary",
+        "series_multiclass",
+        "pandas_binary",
+        "pandas_multiclass",
+        "numpy_binary",
+        "numpy_multiclass",
+        "list_binary",
+        "list_multiclass",
+        "openml_2",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_supported_types_classification(input_data_targettest):
     y = input_data_targettest  # Just to remove visual clutter
@@ -177,10 +178,7 @@ def test_targetvalidator_supported_types_classification(input_data_targettest):
     assert isinstance(y_inverse, np.ndarray)
 
     # Assert that y_encoded is numeric and not boolean
-    assert (
-        is_numeric_dtype(y_encoded.dtype)
-        and not is_bool_dtype(y_encoded.dtype)
-    )
+    assert is_numeric_dtype(y_encoded.dtype) and not is_bool_dtype(y_encoded.dtype)
 
     # Assert dtype is presevered with y -> y_encoded -> y_inverse
     def dtype(arr):
@@ -205,7 +203,7 @@ def dtype(arr):
     if len(shape) == 2 and shape[1] == 1:
         # For cases where y = [[1], [2], [3]],
         # we expect y_inverse, y_encodedd to have been flattened to [1,2,3]
-        expected_shape = (shape[0], )
+        expected_shape = (shape[0],)
     else:
         expected_shape = shape
 
@@ -221,7 +219,7 @@ def dtype(arr):
     #
     # As a result of this, we don't encode 'multilabel-indicator' labels and
     # there is nothing else to check here
-    if validator.type_of_target == 'multilabel-indicator':
+    if validator.type_of_target == "multilabel-indicator":
         assert validator.encoder is None
 
     else:
@@ -242,112 +240,112 @@ def dtype(arr):
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_binary',
-        'pandas_binary',
-        'numpy_binary',
-        'list_binary',
-        'openml_1066',
+        "series_binary",
+        "pandas_binary",
+        "numpy_binary",
+        "list_binary",
+        "openml_1066",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_binary(input_data_targettest):
-    assert type_of_target(input_data_targettest) == 'binary'
+    assert type_of_target(input_data_targettest) == "binary"
     validator = TargetValidator(is_classification=True)
     # Test the X_test also!
     validator.fit(input_data_targettest, input_data_targettest)
     transformed_y = validator.transform(input_data_targettest)
-    assert type_of_target(transformed_y) == 'binary'
+    assert type_of_target(transformed_y) == "binary"
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_multiclass',
-        'pandas_multiclass',
-        'numpy_multiclass',
-        'list_multiclass',
-        'openml_54',
+        "series_multiclass",
+        "pandas_multiclass",
+        "numpy_multiclass",
+        "list_multiclass",
+        "openml_54",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_multiclass(input_data_targettest):
-    assert type_of_target(input_data_targettest) == 'multiclass'
+    assert type_of_target(input_data_targettest) == "multiclass"
     validator = TargetValidator(is_classification=True)
     # Test the X_test also!
     validator.fit(input_data_targettest, input_data_targettest)
     transformed_y = validator.transform(input_data_targettest)
-    assert type_of_target(transformed_y) == 'multiclass'
+    assert type_of_target(transformed_y) == "multiclass"
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'pandas_multilabel',
-        'numpy_multilabel',
-        'list_multilabel',
-        'openml_40594',
+        "pandas_multilabel",
+        "numpy_multilabel",
+        "list_multilabel",
+        "openml_40594",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_multilabel(input_data_targettest):
-    assert type_of_target(input_data_targettest) == 'multilabel-indicator'
+    assert type_of_target(input_data_targettest) == "multilabel-indicator"
     validator = TargetValidator(is_classification=True)
     # Test the X_test also!
     validator.fit(input_data_targettest, input_data_targettest)
     transformed_y = validator.transform(input_data_targettest)
-    assert type_of_target(transformed_y) == 'multilabel-indicator'
+    assert type_of_target(transformed_y) == "multilabel-indicator"
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_continuous',
-        'pandas_continuous',
-        'numpy_continuous',
-        'list_continuous',
-        'openml_531',
+        "series_continuous",
+        "pandas_continuous",
+        "numpy_continuous",
+        "list_continuous",
+        "openml_531",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_continuous(input_data_targettest):
-    assert type_of_target(input_data_targettest) == 'continuous'
+    assert type_of_target(input_data_targettest) == "continuous"
     validator = TargetValidator(is_classification=False)
     # Test the X_test also!
     validator.fit(input_data_targettest, input_data_targettest)
     transformed_y = validator.transform(input_data_targettest)
-    assert type_of_target(transformed_y) == 'continuous'
+    assert type_of_target(transformed_y) == "continuous"
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'pandas_continuous-multioutput',
-        'numpy_continuous-multioutput',
-        'list_continuous-multioutput',
-        'openml_41483',
+        "pandas_continuous-multioutput",
+        "numpy_continuous-multioutput",
+        "list_continuous-multioutput",
+        "openml_41483",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_continuous_multioutput(input_data_targettest):
-    assert type_of_target(input_data_targettest) == 'continuous-multioutput'
+    assert type_of_target(input_data_targettest) == "continuous-multioutput"
     validator = TargetValidator(is_classification=False)
     # Test the X_test also!
     validator.fit(input_data_targettest, input_data_targettest)
     transformed_y = validator.transform(input_data_targettest)
-    assert type_of_target(transformed_y) == 'continuous-multioutput'
+    assert type_of_target(transformed_y) == "continuous-multioutput"
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_binary',
-        'pandas_binary',
-        'numpy_binary',
-        'list_binary',
+        "series_binary",
+        "pandas_binary",
+        "numpy_binary",
+        "list_binary",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_targetvalidator_fitontypeA_transformtypeB(input_data_targettest):
     """
@@ -370,12 +368,12 @@ def test_targetvalidator_fitontypeA_transformtypeB(input_data_targettest):
 
 
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_multilabel',
-        'series_continuous-multioutput',
+        "series_multilabel",
+        "series_continuous-multioutput",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_type_of_target_unsupported(input_data_targettest):
     """
@@ -393,43 +391,63 @@ def test_target_unsupported():
     when providing not supported data input
     """
     validator = TargetValidator(is_classification=True)
-    with pytest.raises(ValueError, match=r"The dimensionality of the train and test targets"):
+    with pytest.raises(
+        ValueError, match=r"The dimensionality of the train and test targets"
+    ):
         validator.fit(
             np.array([[0, 1, 0], [0, 1, 1]]),
             np.array([[0, 1, 0, 0], [0, 1, 1, 1]]),
         )
-    with pytest.raises(ValueError, match=r"Train and test targets must both have the same dtypes"):
+    with pytest.raises(
+        ValueError, match=r"Train and test targets must both have the same dtypes"
+    ):
         validator.fit(
-            pd.DataFrame({'a': [1, 2, 3]}),
-            pd.DataFrame({'a': [True, False, False]}),
+            pd.DataFrame({"a": [1, 2, 3]}),
+            pd.DataFrame({"a": [True, False, False]}),
         )
     with pytest.raises(ValueError, match=r"Provided targets are not supported.*"):
         validator.fit(
             np.array([[0, 1, 2], [0, 3, 4]]),
             np.array([[0, 1, 2, 5], [0, 3, 4, 6]]),
         )
-    with pytest.raises(ValueError, match="Train and test targets must both have the same"):
+    with pytest.raises(
+        ValueError, match="Train and test targets must both have the same"
+    ):
         validator.fit(
-            pd.DataFrame({'string': ['foo']}),
-            pd.DataFrame({'int': [1]}),
+            pd.DataFrame({"string": ["foo"]}),
+            pd.DataFrame({"int": [1]}),
         )
-    with pytest.raises(ValueError, match=r"Auto-sklearn only supports Numpy arrays, .*"):
-        validator.fit({'input1': 1, 'input2': 2})
-    with pytest.raises(ValueError, match=r"arget values cannot contain missing/NaN values"):
+    with pytest.raises(
+        ValueError, match=r"Auto-sklearn only supports Numpy arrays, .*"
+    ):
+        validator.fit({"input1": 1, "input2": 2})
+    with pytest.raises(
+        ValueError, match=r"arget values cannot contain missing/NaN values"
+    ):
         validator.fit(np.array([np.nan, 1, 2]))
-    with pytest.raises(ValueError, match=r"arget values cannot contain missing/NaN values"):
+    with pytest.raises(
+        ValueError, match=r"arget values cannot contain missing/NaN values"
+    ):
         validator.fit(sparse.csr_matrix(np.array([1, 2, np.nan])))
-    with pytest.raises(ValueError, match=r"TargetValidator must have fit\(\) called first"):
+    with pytest.raises(
+        ValueError, match=r"TargetValidator must have fit\(\) called first"
+    ):
         validator.transform(np.array([1, 2, 3]))
-    with pytest.raises(ValueError, match=r"TargetValidator must have fit\(\) called first"):
+    with pytest.raises(
+        ValueError, match=r"TargetValidator must have fit\(\) called first"
+    ):
         validator.inverse_transform(np.array([1, 2, 3]))
-    with pytest.raises(ValueError, match=r"Multi-dimensional classification is not yet supported"):
+    with pytest.raises(
+        ValueError, match=r"Multi-dimensional classification is not yet supported"
+    ):
         validator._fit(np.array([[1, 2, 3], [1, 5, 6]]))
 
     # Dia/ DOK are not supported as type of target makes calls len on the array
     # which causes TypeError: len() of unsized object. Basically, sparse data as
     # multi-label is the only thing that makes sense in this format.
-    with pytest.raises(ValueError, match=r"The provided data could not be interpreted by Sklearn"):
+    with pytest.raises(
+        ValueError, match=r"The provided data could not be interpreted by Sklearn"
+    ):
         validator.fit(sparse.dia_matrix(np.array([1, 2, 3])))
 
     validator.fit(np.array([[0, 1, 0], [0, 1, 1]]))
@@ -443,22 +461,21 @@ def test_targetvalidator_inversetransform():
     """
     validator = TargetValidator(is_classification=True)
     validator.fit(
-        pd.DataFrame(data=['a', 'a', 'b', 'c', 'a'], dtype='category'),
+        pd.DataFrame(data=["a", "a", "b", "c", "a"], dtype="category"),
     )
     y = validator.transform(
-        pd.DataFrame(data=['a', 'a', 'b', 'c', 'a'], dtype='category'),
+        pd.DataFrame(data=["a", "a", "b", "c", "a"], dtype="category"),
     )
     np.testing.assert_array_almost_equal(np.array([0, 0, 1, 2, 0]), y)
 
     y_decoded = validator.inverse_transform(y)
-    assert ['a', 'a', 'b', 'c', 'a'] == y_decoded.tolist()
+    assert ["a", "a", "b", "c", "a"] == y_decoded.tolist()
 
-    assert validator.classes_.tolist() == ['a', 'b', 'c']
+    assert validator.classes_.tolist() == ["a", "b", "c"]
 
     validator = TargetValidator(is_classification=True)
     multi_label = pd.DataFrame(
-        np.array([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]]),
-        dtype=bool
+        np.array([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]]), dtype=bool
     )
     validator.fit(multi_label)
     y = validator.transform(multi_label)
@@ -473,18 +490,18 @@ def test_targetvalidator_inversetransform():
 
 # Actual checks for the targets
 @pytest.mark.parametrize(
-    'input_data_targettest',
+    "input_data_targettest",
     (
-        'series_binary',
-        'series_multiclass',
-        'pandas_binary',
-        'pandas_multiclass',
-        'numpy_binary',
-        'numpy_multiclass',
-        'list_binary',
-        'list_multiclass',
+        "series_binary",
+        "series_multiclass",
+        "pandas_binary",
+        "pandas_multiclass",
+        "numpy_binary",
+        "numpy_multiclass",
+        "list_binary",
+        "list_multiclass",
     ),
-    indirect=True
+    indirect=True,
 )
 def test_unknown_categories_in_targets(input_data_targettest):
     validator = TargetValidator(is_classification=True)
diff --git a/test/test_data/test_validation.py b/test/test_data/test_validation.py
index 7bc2cb3dc5..4d09c65075 100644
--- a/test/test_data/test_validation.py
+++ b/test/test_data/test_validation.py
@@ -1,34 +1,33 @@
 import numpy as np
-
 import pandas as pd
-
 import pytest
-
-from scipy import sparse
-
 import sklearn.datasets
 import sklearn.model_selection
+from scipy import sparse
 
 from autosklearn.data.validation import InputValidator
 
 
-@pytest.mark.parametrize('openmlid', [2, 40975, 40984])
-@pytest.mark.parametrize('as_frame', [True, False])
+@pytest.mark.parametrize("openmlid", [2, 40975, 40984])
+@pytest.mark.parametrize("as_frame", [True, False])
 def test_data_validation_for_classification(openmlid, as_frame):
-    x, y = sklearn.datasets.fetch_openml(data_id=openmlid, return_X_y=True, as_frame=as_frame)
+    x, y = sklearn.datasets.fetch_openml(
+        data_id=openmlid, return_X_y=True, as_frame=as_frame
+    )
     validator = InputValidator(is_classification=True)
 
     if as_frame:
         # NaN is not supported in categories, so
         # drop columns with them.
         nan_cols = [i for i in x.columns if x[i].isnull().any()]
-        cat_cols = [i for i in x.columns if x[i].dtype.name in ['category', 'bool']]
+        cat_cols = [i for i in x.columns if x[i].dtype.name in ["category", "bool"]]
         unsupported_columns = list(set(nan_cols) & set(cat_cols))
         if len(unsupported_columns) > 0:
             x.drop(unsupported_columns, axis=1, inplace=True)
 
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        x, y, test_size=0.33, random_state=0)
+        x, y, test_size=0.33, random_state=0
+    )
 
     validator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
 
@@ -49,23 +48,26 @@ def test_data_validation_for_classification(openmlid, as_frame):
     validator.feature_validator.feat_type is not None
 
 
-@pytest.mark.parametrize('openmlid', [505, 546, 531])
-@pytest.mark.parametrize('as_frame', [True, False])
+@pytest.mark.parametrize("openmlid", [505, 546, 531])
+@pytest.mark.parametrize("as_frame", [True, False])
 def test_data_validation_for_regression(openmlid, as_frame):
-    x, y = sklearn.datasets.fetch_openml(data_id=openmlid, return_X_y=True, as_frame=as_frame)
+    x, y = sklearn.datasets.fetch_openml(
+        data_id=openmlid, return_X_y=True, as_frame=as_frame
+    )
     validator = InputValidator(is_classification=False)
 
     if as_frame:
         # NaN is not supported in categories, so
         # drop columns with them.
         nan_cols = [i for i in x.columns if x[i].isnull().any()]
-        cat_cols = [i for i in x.columns if x[i].dtype.name in ['category', 'bool']]
+        cat_cols = [i for i in x.columns if x[i].dtype.name in ["category", "bool"]]
         unsupported_columns = list(set(nan_cols) & set(cat_cols))
         if len(unsupported_columns) > 0:
             x.drop(unsupported_columns, axis=1, inplace=True)
 
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-        x, y, test_size=0.33, random_state=0)
+        x, y, test_size=0.33, random_state=0
+    )
 
     validator.fit(X_train=X_train, y_train=y_train)
 
@@ -83,7 +85,9 @@ def test_data_validation_for_regression(openmlid, as_frame):
 
 
 def test_sparse_data_validation_for_regression():
-    X, y = sklearn.datasets.make_regression(n_samples=100, n_features=50, random_state=0)
+    X, y = sklearn.datasets.make_regression(
+        n_samples=100, n_features=50, random_state=0
+    )
     X_sp = sparse.coo_matrix(X)
     validator = InputValidator(is_classification=False)
 
@@ -118,7 +122,9 @@ def test_validation_unsupported():
             X_test=np.array([[0, 1, 0], [0, 1, 1]]),
             y_test=np.array([0, 1, 0, 0, 0, 0]),
         )
-    with pytest.raises(ValueError, match=r"Cannot call transform on a validator .*fitted"):
+    with pytest.raises(
+        ValueError, match=r"Cannot call transform on a validator .*fitted"
+    ):
         validator.transform(
             X=np.array([[0, 1, 0], [0, 1, 1]]),
             y=np.array([0, 1]),
diff --git a/test/test_ensemble_builder/__init__.py b/test/test_ensemble_builder/__init__.py
index 51b8efdf22..b74c2a5ccb 100644
--- a/test/test_ensemble_builder/__init__.py
+++ b/test/test_ensemble_builder/__init__.py
@@ -1,2 +1,2 @@
 # -*- encoding: utf-8 -*-
-__author__ = 'mlindauer'
+__author__ = "mlindauer"
diff --git a/test/test_ensemble_builder/ensemble_utils.py b/test/test_ensemble_builder/ensemble_utils.py
index b98021c7bd..fa0f22e9e7 100644
--- a/test/test_ensemble_builder/ensemble_utils.py
+++ b/test/test_ensemble_builder/ensemble_utils.py
@@ -5,47 +5,55 @@
 
 import numpy as np
 
-from autosklearn.automl_common.common.ensemble_building.abstract_ensemble import AbstractEnsemble
-
-from autosklearn.metrics import make_scorer
+from autosklearn.automl_common.common.ensemble_building.abstract_ensemble import (
+    AbstractEnsemble,
+)
 from autosklearn.ensemble_builder import EnsembleBuilder
+from autosklearn.metrics import make_scorer
 
 
 def scorer_function(a, b):
     return 0.9
 
 
-MockMetric = make_scorer('mock', scorer_function)
+MockMetric = make_scorer("mock", scorer_function)
 
 
 class BackendMock(object):
-
     def __init__(self, target_directory):
-        this_directory = os.path.abspath(
-            os.path.dirname(__file__)
+        this_directory = os.path.abspath(os.path.dirname(__file__))
+        shutil.copytree(
+            os.path.join(this_directory, "data"), os.path.join(target_directory)
         )
-        shutil.copytree(os.path.join(this_directory, 'data'), os.path.join(target_directory))
         self.temporary_directory = target_directory
-        self.internals_directory = os.path.join(self.temporary_directory, '.auto-sklearn')
+        self.internals_directory = os.path.join(
+            self.temporary_directory, ".auto-sklearn"
+        )
 
     def load_datamanager(self):
         manager = unittest.mock.Mock()
         manager.__reduce__ = lambda self: (unittest.mock.MagicMock, ())
-        array = np.load(os.path.join(
-            self.temporary_directory,
-            '.auto-sklearn',
-            'runs', '0_3_100.0',
-            'predictions_test_0_3_100.0.npy'
-        ))
+        array = np.load(
+            os.path.join(
+                self.temporary_directory,
+                ".auto-sklearn",
+                "runs",
+                "0_3_100.0",
+                "predictions_test_0_3_100.0.npy",
+            )
+        )
         manager.data.get.return_value = array
         return manager
 
     def load_targets_ensemble(self):
-        with open(os.path.join(
-            self.temporary_directory,
-            ".auto-sklearn",
-            "predictions_ensemble_true.npy"
-        ), "rb") as fp:
+        with open(
+            os.path.join(
+                self.temporary_directory,
+                ".auto-sklearn",
+                "predictions_ensemble_true.npy",
+            ),
+            "rb",
+        ) as fp:
             y = np.load(fp, allow_pickle=True)
         return y
 
@@ -56,13 +64,15 @@ def save_predictions_as_txt(self, predictions, subset, idx, prefix, precision):
         return
 
     def get_runs_directory(self) -> str:
-        return os.path.join(self.temporary_directory, '.auto-sklearn', 'runs')
+        return os.path.join(self.temporary_directory, ".auto-sklearn", "runs")
 
     def get_numrun_directory(self, seed: int, num_run: int, budget: float) -> str:
-        return os.path.join(self.get_runs_directory(), '%d_%d_%s' % (seed, num_run, budget))
+        return os.path.join(
+            self.get_runs_directory(), "%d_%d_%s" % (seed, num_run, budget)
+        )
 
     def get_model_filename(self, seed: int, idx: int, budget: float) -> str:
-        return '%s.%s.%s.model' % (seed, idx, budget)
+        return "%s.%s.%s.model" % (seed, idx, budget)
 
 
 def compare_read_preds(read_preds1, read_preds2):
@@ -91,13 +101,15 @@ def compare_read_preds(read_preds1, read_preds2):
 
 
 class EnsembleBuilderMemMock(EnsembleBuilder):
-
     def fit_ensemble(self, selected_keys):
         return True
 
-    def predict(self, set_: str,
-                ensemble: AbstractEnsemble,
-                selected_keys: list,
-                n_preds: int,
-                index_run: int):
+    def predict(
+        self,
+        set_: str,
+        ensemble: AbstractEnsemble,
+        selected_keys: list,
+        n_preds: int,
+        index_run: int,
+    ):
         np.ones([10000000, 1000000])
diff --git a/test/test_ensemble_builder/test_ensemble.py b/test/test_ensemble_builder/test_ensemble.py
index 335c07eca2..3533da37cd 100644
--- a/test/test_ensemble_builder/test_ensemble.py
+++ b/test/test_ensemble_builder/test_ensemble.py
@@ -1,35 +1,40 @@
 import os
+import pickle
+import shutil
 import sys
 import time
 import unittest.mock
-import pickle
-import pytest
-import shutil
 
 import dask.distributed
 import numpy as np
 import pandas as pd
-from smac.runhistory.runhistory import RunValue, RunKey, RunHistory
+import pytest
+from smac.runhistory.runhistory import RunHistory, RunKey, RunValue
 
-from autosklearn.constants import MULTILABEL_CLASSIFICATION, BINARY_CLASSIFICATION
-from autosklearn.metrics import roc_auc, accuracy, log_loss
+from autosklearn.constants import BINARY_CLASSIFICATION, MULTILABEL_CLASSIFICATION
 from autosklearn.ensemble_builder import (
-    EnsembleBuilder,
-    EnsembleBuilderManager,
     Y_ENSEMBLE,
-    Y_VALID,
     Y_TEST,
+    Y_VALID,
+    EnsembleBuilder,
+    EnsembleBuilderManager,
 )
 from autosklearn.ensembles.singlebest_ensemble import SingleBest
+from autosklearn.metrics import accuracy, log_loss, roc_auc
 
 this_directory = os.path.dirname(__file__)
 sys.path.append(this_directory)
-from ensemble_utils import BackendMock, compare_read_preds, EnsembleBuilderMemMock, MockMetric  # noqa (E402: module level import not   at top of file)
+from ensemble_utils import (  # noqa (E402: module level import not   at top of file)
+    BackendMock,
+    EnsembleBuilderMemMock,
+    MockMetric,
+    compare_read_preds,
+)
 
 
 @pytest.fixture(scope="function")
 def ensemble_backend(request):
-    test_id = '%s_%s' % (request.module.__name__, request.node.name)
+    test_id = "%s_%s" % (request.module.__name__, request.node.name)
     test_dir = os.path.join(this_directory, test_id)
 
     try:
@@ -46,7 +51,9 @@ def session_run_at_end():
                 shutil.rmtree(test_dir)
             except:  # noqa E722
                 pass
+
         return session_run_at_end
+
     request.addfinalizer(get_finalizer(backend))
 
     return backend
@@ -58,10 +65,7 @@ def ensemble_run_history(request):
     run_history = RunHistory()
     run_history._add(
         RunKey(
-            config_id=3,
-            instance_id='{"task_id": "breast_cancer"}',
-            seed=1,
-            budget=3.0
+            config_id=3, instance_id='{"task_id": "breast_cancer"}', seed=1, budget=3.0
         ),
         RunValue(
             cost=0.11347517730496459,
@@ -70,30 +74,29 @@ def ensemble_run_history(request):
             starttime=time.time(),
             endtime=time.time(),
             additional_info={
-                'duration': 0.20323538780212402,
-                'num_run': 3,
-                'configuration_origin': 'Random Search'}
+                "duration": 0.20323538780212402,
+                "num_run": 3,
+                "configuration_origin": "Random Search",
+            },
         ),
         status=None,
         origin=None,
     )
     run_history._add(
         RunKey(
-            config_id=6,
-            instance_id='{"task_id": "breast_cancer"}',
-            seed=1,
-            budget=6.0
+            config_id=6, instance_id='{"task_id": "breast_cancer"}', seed=1, budget=6.0
         ),
         RunValue(
-            cost=2*0.11347517730496459,
-            time=2*0.21858787536621094,
+            cost=2 * 0.11347517730496459,
+            time=2 * 0.21858787536621094,
             status=None,
             starttime=time.time(),
             endtime=time.time(),
             additional_info={
-                'duration': 0.20323538780212402,
-                'num_run': 6,
-                'configuration_origin': 'Random Search'}
+                "duration": 0.20323538780212402,
+                "num_run": 6,
+                "configuration_origin": "Random Search",
+            },
         ),
         status=None,
         origin=None,
@@ -118,13 +121,13 @@ def testRead(ensemble_backend):
 
     filename = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy",
     )
     assert ensbuilder.read_losses[filename]["ens_loss"] == 0.5
 
     filename = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy",
     )
     assert ensbuilder.read_losses[filename]["ens_loss"] == 0.0
 
@@ -132,13 +135,13 @@ def testRead(ensemble_backend):
 @pytest.mark.parametrize(
     "ensemble_nbest,max_models_on_disc,exp",
     (
-            (1, None, 1),
-            (1.0, None, 2),
-            (0.1, None, 1),
-            (0.9, None, 1),
-            (1, 2, 1),
-            (2, 1, 1),
-    )
+        (1, None, 1),
+        (1.0, None, 2),
+        (0.1, None, 1),
+        (0.9, None, 1),
+        (1, 2, 1),
+        (2, 1, 1),
+    ),
 )
 def testNBest(ensemble_backend, ensemble_nbest, max_models_on_disc, exp):
     ensbuilder = EnsembleBuilder(
@@ -158,26 +161,29 @@ def testNBest(ensemble_backend, ensemble_nbest, max_models_on_disc, exp):
 
     fixture = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy",
     )
     assert sel_keys[0] == fixture
 
 
-@pytest.mark.parametrize("test_case,exp", [
-    # If None, no reduction
-    (None, 2),
-    # If Int, limit only on exceed
-    (4, 2),
-    (1, 1),
-    # If Float, translate float to # models.
-    # below, mock of each file is 100 Mb and 4 files .model and .npy (test/val/pred) exist
-    # per run (except for run3, there they are 5). Now, it takes 500MB for run 3 and
-    # another 500 MB of slack because we keep as much space as the largest model
-    # available as slack
-    (1499.0, 1),
-    (1500.0, 2),
-    (9999.0, 2),
-])
+@pytest.mark.parametrize(
+    "test_case,exp",
+    [
+        # If None, no reduction
+        (None, 2),
+        # If Int, limit only on exceed
+        (4, 2),
+        (1, 1),
+        # If Float, translate float to # models.
+        # below, mock of each file is 100 Mb and 4 files .model and .npy (test/val/pred)
+        # per run (except for run3, there they are 5). Now, it takes 500MB for run 3 and
+        # another 500 MB of slack because we keep as much space as the largest model
+        # available as slack
+        (1499.0, 1),
+        (1500.0, 2),
+        (9999.0, 2),
+    ],
+)
 def testMaxModelsOnDisc(ensemble_backend, test_case, exp):
     ensemble_nbest = 4
     ensbuilder = EnsembleBuilder(
@@ -190,8 +196,8 @@ def testMaxModelsOnDisc(ensemble_backend, test_case, exp):
         max_models_on_disc=test_case,
     )
 
-    with unittest.mock.patch('os.path.getsize') as mock:
-        mock.return_value = 100*1024*1024
+    with unittest.mock.patch("os.path.getsize") as mock:
+        mock.return_value = 100 * 1024 * 1024
         ensbuilder.compute_loss_per_model()
         sel_keys = ensbuilder.get_n_best_preds()
         assert len(sel_keys) == exp, test_case
@@ -211,26 +217,26 @@ def testMaxModelsOnDisc2(ensemble_backend):
     )
     ensbuilder.read_preds = {}
     for i in range(50):
-        ensbuilder.read_losses['pred'+str(i)] = {
-            'ens_loss': -i*10,
-            'num_run': i,
-            'loaded': 1,
+        ensbuilder.read_losses["pred" + str(i)] = {
+            "ens_loss": -i * 10,
+            "num_run": i,
+            "loaded": 1,
             "seed": 1,
-            "disc_space_cost_mb": 50*i,
+            "disc_space_cost_mb": 50 * i,
         }
-        ensbuilder.read_preds['pred'+str(i)] = {Y_ENSEMBLE: True}
+        ensbuilder.read_preds["pred" + str(i)] = {Y_ENSEMBLE: True}
     sel_keys = ensbuilder.get_n_best_preds()
-    assert ['pred49', 'pred48', 'pred47'] == sel_keys
+    assert ["pred49", "pred48", "pred47"] == sel_keys
 
     # Make sure at least one model is kept alive
     ensbuilder.max_models_on_disc = 0.0
     sel_keys = ensbuilder.get_n_best_preds()
-    assert ['pred49'] == sel_keys
+    assert ["pred49"] == sel_keys
 
 
 @pytest.mark.parametrize(
     "performance_range_threshold,exp",
-    ((0.0, 4), (0.1, 4), (0.3, 3), (0.5, 2), (0.6, 2), (0.8, 1), (1.0, 1), (1, 1))
+    ((0.0, 4), (0.1, 4), (0.3, 3), (0.5, 2), (0.6, 2), (0.8, 1), (1.0, 1), (1, 1)),
 )
 def testPerformanceRangeThreshold(ensemble_backend, performance_range_threshold, exp):
     ensbuilder = EnsembleBuilder(
@@ -240,14 +246,14 @@ def testPerformanceRangeThreshold(ensemble_backend, performance_range_threshold,
         metric=roc_auc,
         seed=0,  # important to find the test files
         ensemble_nbest=100,
-        performance_range_threshold=performance_range_threshold
+        performance_range_threshold=performance_range_threshold,
     )
     ensbuilder.read_losses = {
-        'A': {'ens_loss': -1, 'num_run': 1, 'loaded': -1, "seed": 1},
-        'B': {'ens_loss': -2, 'num_run': 2, 'loaded': -1, "seed": 1},
-        'C': {'ens_loss': -3, 'num_run': 3, 'loaded': -1, "seed": 1},
-        'D': {'ens_loss': -4, 'num_run': 4, 'loaded': -1, "seed": 1},
-        'E': {'ens_loss': -5, 'num_run': 5, 'loaded': -1, "seed": 1},
+        "A": {"ens_loss": -1, "num_run": 1, "loaded": -1, "seed": 1},
+        "B": {"ens_loss": -2, "num_run": 2, "loaded": -1, "seed": 1},
+        "C": {"ens_loss": -3, "num_run": 3, "loaded": -1, "seed": 1},
+        "D": {"ens_loss": -4, "num_run": 4, "loaded": -1, "seed": 1},
+        "E": {"ens_loss": -5, "num_run": 5, "loaded": -1, "seed": 1},
     }
     ensbuilder.read_preds = {
         key: {key_2: True for key_2 in (Y_ENSEMBLE, Y_VALID, Y_TEST)}
@@ -261,12 +267,19 @@ def testPerformanceRangeThreshold(ensemble_backend, performance_range_threshold,
 @pytest.mark.parametrize(
     "performance_range_threshold,ensemble_nbest,exp",
     (
-        (0.0, 1, 1), (0.0, 1.0, 4), (0.1, 2, 2), (0.3, 4, 3),
-        (0.5, 1, 1), (0.6, 10, 2), (0.8, 0.5, 1), (1, 1.0, 1)
-    )
+        (0.0, 1, 1),
+        (0.0, 1.0, 4),
+        (0.1, 2, 2),
+        (0.3, 4, 3),
+        (0.5, 1, 1),
+        (0.6, 10, 2),
+        (0.8, 0.5, 1),
+        (1, 1.0, 1),
+    ),
 )
-def testPerformanceRangeThresholdMaxBest(ensemble_backend, performance_range_threshold,
-                                         ensemble_nbest, exp):
+def testPerformanceRangeThresholdMaxBest(
+    ensemble_backend, performance_range_threshold, ensemble_nbest, exp
+):
     ensbuilder = EnsembleBuilder(
         backend=ensemble_backend,
         dataset_name="TEST",
@@ -278,11 +291,11 @@ def testPerformanceRangeThresholdMaxBest(ensemble_backend, performance_range_thr
         max_models_on_disc=None,
     )
     ensbuilder.read_losses = {
-        'A': {'ens_loss': -1, 'num_run': 1, 'loaded': -1, "seed": 1},
-        'B': {'ens_loss': -2, 'num_run': 2, 'loaded': -1, "seed": 1},
-        'C': {'ens_loss': -3, 'num_run': 3, 'loaded': -1, "seed": 1},
-        'D': {'ens_loss': -4, 'num_run': 4, 'loaded': -1, "seed": 1},
-        'E': {'ens_loss': -5, 'num_run': 5, 'loaded': -1, "seed": 1},
+        "A": {"ens_loss": -1, "num_run": 1, "loaded": -1, "seed": 1},
+        "B": {"ens_loss": -2, "num_run": 2, "loaded": -1, "seed": 1},
+        "C": {"ens_loss": -3, "num_run": 3, "loaded": -1, "seed": 1},
+        "D": {"ens_loss": -4, "num_run": 4, "loaded": -1, "seed": 1},
+        "E": {"ens_loss": -5, "num_run": 5, "loaded": -1, "seed": 1},
     }
     ensbuilder.read_preds = {
         key: {key_2: True for key_2 in (Y_ENSEMBLE, Y_VALID, Y_TEST)}
@@ -295,13 +308,14 @@ def testPerformanceRangeThresholdMaxBest(ensemble_backend, performance_range_thr
 
 def testFallBackNBest(ensemble_backend):
 
-    ensbuilder = EnsembleBuilder(backend=ensemble_backend,
-                                 dataset_name="TEST",
-                                 task_type=BINARY_CLASSIFICATION,
-                                 metric=roc_auc,
-                                 seed=0,  # important to find the test files
-                                 ensemble_nbest=1
-                                 )
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        task_type=BINARY_CLASSIFICATION,
+        metric=roc_auc,
+        seed=0,  # important to find the test files
+        ensemble_nbest=1,
+    )
 
     ensbuilder.compute_loss_per_model()
     print()
@@ -311,19 +325,19 @@ def testFallBackNBest(ensemble_backend):
 
     filename = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy",
     )
     ensbuilder.read_losses[filename]["ens_loss"] = -1
 
     filename = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy"
+        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy",
     )
     ensbuilder.read_losses[filename]["ens_loss"] = -1
 
     filename = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy",
     )
     ensbuilder.read_losses[filename]["ens_loss"] = -1
 
@@ -331,7 +345,7 @@ def testFallBackNBest(ensemble_backend):
 
     fixture = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy",
     )
     assert len(sel_keys) == 1
     assert sel_keys[0] == fixture
@@ -339,13 +353,14 @@ def testFallBackNBest(ensemble_backend):
 
 def testGetValidTestPreds(ensemble_backend):
 
-    ensbuilder = EnsembleBuilder(backend=ensemble_backend,
-                                 dataset_name="TEST",
-                                 task_type=BINARY_CLASSIFICATION,
-                                 metric=roc_auc,
-                                 seed=0,  # important to find the test files
-                                 ensemble_nbest=1
-                                 )
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        task_type=BINARY_CLASSIFICATION,
+        metric=roc_auc,
+        seed=0,  # important to find the test files
+        ensemble_nbest=1,
+    )
 
     ensbuilder.compute_loss_per_model()
 
@@ -353,15 +368,15 @@ def testGetValidTestPreds(ensemble_backend):
     # different name. num_run=2 is selected when doing sorted()
     d1 = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy",
     )
     d2 = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy",
     )
     d3 = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy"
+        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy",
     )
 
     sel_keys = ensbuilder.get_n_best_preds()
@@ -371,10 +386,13 @@ def testGetValidTestPreds(ensemble_backend):
     # Number of read files should be three and
     # predictions_ensemble_0_4_0.0.npy must not be in there
     assert len(ensbuilder.read_preds) == 3
-    assert os.path.join(
+    assert (
+        os.path.join(
             ensemble_backend.temporary_directory,
-            ".auto-sklearn/runs/0_4_0.0/predictions_ensemble_0_4_0.0.npy"
-    ) not in ensbuilder.read_preds
+            ".auto-sklearn/runs/0_4_0.0/predictions_ensemble_0_4_0.0.npy",
+        )
+        not in ensbuilder.read_preds
+    )
 
     # not selected --> should still be None
     assert ensbuilder.read_preds[d1][Y_VALID] is None
@@ -403,7 +421,7 @@ def testEntireEnsembleBuilder(ensemble_backend):
 
     d2 = os.path.join(
         ensemble_backend.temporary_directory,
-        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy",
     )
 
     sel_keys = ensbuilder.get_n_best_preds()
@@ -454,11 +472,13 @@ def test_main(ensemble_backend):
         seed=0,  # important to find the test files
         ensemble_nbest=2,
         max_models_on_disc=None,
-        )
+    )
     ensbuilder.SAVE2DISC = False
 
     run_history, ensemble_nbest, _, _, _ = ensbuilder.main(
-        time_left=np.inf, iteration=1, return_predictions=False,
+        time_left=np.inf,
+        iteration=1,
+        return_predictions=False,
     )
 
     assert len(ensbuilder.read_preds) == 3
@@ -473,26 +493,26 @@ def test_main(ensemble_backend):
     # As the data loader loads the same val/train/test
     # we expect 1.0 as score and all keys available
     expected_performance = {
-        'ensemble_val_score': 1.0,
-        'ensemble_test_score': 1.0,
-        'ensemble_optimization_score': 1.0,
+        "ensemble_val_score": 1.0,
+        "ensemble_test_score": 1.0,
+        "ensemble_optimization_score": 1.0,
     }
 
     # Make sure that expected performance is a subset of the run history
     assert all(item in run_history[0].items() for item in expected_performance.items())
-    assert 'Timestamp' in run_history[0]
-    assert isinstance(run_history[0]['Timestamp'], pd.Timestamp)
+    assert "Timestamp" in run_history[0]
+    assert isinstance(run_history[0]["Timestamp"], pd.Timestamp)
 
     assert os.path.exists(
-        os.path.join(ensemble_backend.internals_directory, 'ensemble_read_preds.pkl')
+        os.path.join(ensemble_backend.internals_directory, "ensemble_read_preds.pkl")
     ), os.listdir(ensemble_backend.internals_directory)
     assert os.path.exists(
-        os.path.join(ensemble_backend.internals_directory, 'ensemble_read_losses.pkl')
+        os.path.join(ensemble_backend.internals_directory, "ensemble_read_losses.pkl")
     ), os.listdir(ensemble_backend.internals_directory)
 
 
 def test_run_end_at(ensemble_backend):
-    with unittest.mock.patch('pynisher.enforce_limits') as pynisher_mock:
+    with unittest.mock.patch("pynisher.enforce_limits") as pynisher_mock:
         ensbuilder = EnsembleBuilder(
             backend=ensemble_backend,
             dataset_name="TEST",
@@ -501,69 +521,74 @@ def test_run_end_at(ensemble_backend):
             seed=0,  # important to find the test files
             ensemble_nbest=2,
             max_models_on_disc=None,
-            )
+        )
         ensbuilder.SAVE2DISC = False
 
         current_time = time.time()
 
-        ensbuilder.run(end_at=current_time + 10, iteration=1, pynisher_context='forkserver')
-        # 4 seconds left because: 10 seconds - 5 seconds overhead - very little overhead,
+        ensbuilder.run(
+            end_at=current_time + 10, iteration=1, pynisher_context="forkserver"
+        )
+        # 4 seconds left because: 10 seconds - 5 seconds overhead - little overhead
         # but then rounded to an integer
         assert pynisher_mock.call_args_list[0][1]["wall_time_in_s"], 4
 
 
 def testLimit(ensemble_backend):
-    ensbuilder = EnsembleBuilderMemMock(backend=ensemble_backend,
-                                        dataset_name="TEST",
-                                        task_type=BINARY_CLASSIFICATION,
-                                        metric=roc_auc,
-                                        seed=0,  # important to find the test files
-                                        ensemble_nbest=10,
-                                        # small to trigger MemoryException
-                                        memory_limit=100,
-                                        )
+    ensbuilder = EnsembleBuilderMemMock(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        task_type=BINARY_CLASSIFICATION,
+        metric=roc_auc,
+        seed=0,  # important to find the test files
+        ensemble_nbest=10,
+        # small to trigger MemoryException
+        memory_limit=100,
+    )
     ensbuilder.SAVE2DISC = False
 
     read_losses_file = os.path.join(
-        ensemble_backend.internals_directory,
-        'ensemble_read_losses.pkl'
+        ensemble_backend.internals_directory, "ensemble_read_losses.pkl"
     )
     read_preds_file = os.path.join(
-        ensemble_backend.internals_directory,
-        'ensemble_read_preds.pkl'
+        ensemble_backend.internals_directory, "ensemble_read_preds.pkl"
     )
 
     def mtime_mock(filename):
         mtimes = {
-            'predictions_ensemble_0_1_0.0.npy': 0,
-            'predictions_valid_0_1_0.0.npy': 0.1,
-            'predictions_test_0_1_0.0.npy': 0.2,
-            'predictions_ensemble_0_2_0.0.npy': 1,
-            'predictions_valid_0_2_0.0.npy': 1.1,
-            'predictions_test_0_2_0.0.npy': 1.2,
-            'predictions_ensemble_0_3_100.0.npy': 2,
-            'predictions_valid_0_3_100.0.npy': 2.1,
-            'predictions_test_0_3_100.0.npy': 2.2,
+            "predictions_ensemble_0_1_0.0.npy": 0,
+            "predictions_valid_0_1_0.0.npy": 0.1,
+            "predictions_test_0_1_0.0.npy": 0.2,
+            "predictions_ensemble_0_2_0.0.npy": 1,
+            "predictions_valid_0_2_0.0.npy": 1.1,
+            "predictions_test_0_2_0.0.npy": 1.2,
+            "predictions_ensemble_0_3_100.0.npy": 2,
+            "predictions_valid_0_3_100.0.npy": 2.1,
+            "predictions_test_0_3_100.0.npy": 2.2,
         }
         return mtimes[os.path.split(filename)[1]]
 
-    with unittest.mock.patch('logging.getLogger') as get_logger_mock, \
-            unittest.mock.patch('logging.config.dictConfig') as _, \
-            unittest.mock.patch('os.path.getmtime') as mtime:
+    with unittest.mock.patch(
+        "logging.getLogger"
+    ) as get_logger_mock, unittest.mock.patch(
+        "logging.config.dictConfig"
+    ) as _, unittest.mock.patch(
+        "os.path.getmtime"
+    ) as mtime:
         logger_mock = unittest.mock.Mock()
         logger_mock.handlers = []
         get_logger_mock.return_value = logger_mock
         mtime.side_effect = mtime_mock
 
-        ensbuilder.run(time_left=1000, iteration=0, pynisher_context='fork')
+        ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork")
         assert os.path.exists(read_losses_file)
         assert not os.path.exists(read_preds_file)
         assert logger_mock.warning.call_count == 1
-        ensbuilder.run(time_left=1000, iteration=0, pynisher_context='fork')
+        ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork")
         assert os.path.exists(read_losses_file)
         assert not os.path.exists(read_preds_file)
         assert logger_mock.warning.call_count == 2
-        ensbuilder.run(time_left=1000, iteration=0, pynisher_context='fork')
+        ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork")
         assert os.path.exists(read_losses_file)
         assert not os.path.exists(read_preds_file)
         assert logger_mock.warning.call_count == 3
@@ -571,7 +596,7 @@ def mtime_mock(filename):
         # it should try to reduce ensemble_nbest until it also failed at 2
         assert ensbuilder.ensemble_nbest == 1
 
-        ensbuilder.run(time_left=1000, iteration=0, pynisher_context='fork')
+        ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork")
         assert os.path.exists(read_losses_file)
         assert not os.path.exists(read_preds_file)
         assert logger_mock.warning.call_count == 4
@@ -579,9 +604,9 @@ def mtime_mock(filename):
         # it should next reduce the number of models to read at most
         assert ensbuilder.read_at_most == 1
 
-        # And then it still runs, but basically won't do anything any more except for raising error
-        # messages via the logger
-        ensbuilder.run(time_left=1000, iteration=0, pynisher_context='fork')
+        # And then it still runs, but basically won't do anything any more except for
+        # raising error messages via the logger
+        ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork")
         assert os.path.exists(read_losses_file)
         assert not os.path.exists(read_preds_file)
         assert logger_mock.warning.call_count == 4
@@ -592,8 +617,9 @@ def mtime_mock(filename):
             logger_mock.error.call_args_list
         )
         for i in range(len(logger_mock.error.call_args_list)):
-            assert 'Memory Exception -- Unable to further reduce' in str(
-                logger_mock.error.call_args_list[i])
+            assert "Memory Exception -- Unable to further reduce" in str(
+                logger_mock.error.call_args_list[i]
+            )
 
 
 def test_read_pickle_read_preds(ensemble_backend):
@@ -610,15 +636,14 @@ def test_read_pickle_read_preds(ensemble_backend):
         seed=0,  # important to find the test files
         ensemble_nbest=2,
         max_models_on_disc=None,
-        )
+    )
     ensbuilder.SAVE2DISC = False
 
     ensbuilder.main(time_left=np.inf, iteration=1, return_predictions=False)
 
     # Check that the memory was created
     ensemble_memory_file = os.path.join(
-        ensemble_backend.internals_directory,
-        'ensemble_read_preds.pkl'
+        ensemble_backend.internals_directory, "ensemble_read_preds.pkl"
     )
     assert os.path.exists(ensemble_memory_file)
 
@@ -630,8 +655,7 @@ def test_read_pickle_read_preds(ensemble_backend):
     assert last_hash == ensbuilder.last_hash
 
     ensemble_memory_file = os.path.join(
-        ensemble_backend.internals_directory,
-        'ensemble_read_losses.pkl'
+        ensemble_backend.internals_directory, "ensemble_read_losses.pkl"
     )
     assert os.path.exists(ensemble_memory_file)
 
@@ -650,21 +674,23 @@ def test_read_pickle_read_preds(ensemble_backend):
         seed=0,  # important to find the test files
         ensemble_nbest=2,
         max_models_on_disc=None,
-        )
+    )
     compare_read_preds(ensbuilder2.read_preds, ensbuilder.read_preds)
     compare_read_preds(ensbuilder2.read_losses, ensbuilder.read_losses)
     assert ensbuilder2.last_hash == ensbuilder.last_hash
 
 
 @pytest.mark.parametrize("metric", [log_loss, accuracy])
-@unittest.mock.patch('os.path.exists')
-def test_get_identifiers_from_run_history(exists, metric, ensemble_run_history, ensemble_backend):
+@unittest.mock.patch("os.path.exists")
+def test_get_identifiers_from_run_history(
+    exists, metric, ensemble_run_history, ensemble_backend
+):
     exists.return_value = True
     ensemble = SingleBest(
-         metric=log_loss,
-         seed=1,
-         run_history=ensemble_run_history,
-         backend=ensemble_backend,
+        metric=log_loss,
+        seed=1,
+        run_history=ensemble_run_history,
+        backend=ensemble_backend,
     )
 
     # Just one model
@@ -682,7 +708,7 @@ def test_ensemble_builder_process_realrun(dask_client_single_worker, ensemble_ba
         start_time=time.time(),
         time_left_for_ensembles=1000,
         backend=ensemble_backend,
-        dataset_name='Test',
+        dataset_name="Test",
         task=BINARY_CLASSIFICATION,
         metric=MockMetric,
         ensemble_size=50,
@@ -701,12 +727,12 @@ def test_ensemble_builder_process_realrun(dask_client_single_worker, ensemble_ba
     result = future.result()
     history, _, _, _, _ = result
 
-    assert 'ensemble_optimization_score' in history[0]
-    assert history[0]['ensemble_optimization_score'] == 0.9
-    assert 'ensemble_val_score' in history[0]
-    assert history[0]['ensemble_val_score'] == 0.9
-    assert 'ensemble_test_score' in history[0]
-    assert history[0]['ensemble_test_score'] == 0.9
+    assert "ensemble_optimization_score" in history[0]
+    assert history[0]["ensemble_optimization_score"] == 0.9
+    assert "ensemble_val_score" in history[0]
+    assert history[0]["ensemble_val_score"] == 0.9
+    assert "ensemble_test_score" in history[0]
+    assert history[0]["ensemble_test_score"] == 0.9
 
 
 def test_ensemble_builder_nbest_remembered(
@@ -722,7 +748,7 @@ def test_ensemble_builder_nbest_remembered(
         start_time=time.time(),
         time_left_for_ensembles=1000,
         backend=ensemble_backend,
-        dataset_name='Test',
+        dataset_name="Test",
         task=MULTILABEL_CLASSIFICATION,
         metric=roc_auc,
         ensemble_size=50,
@@ -740,7 +766,9 @@ def test_ensemble_builder_nbest_remembered(
     future = manager.futures[0]
     dask.distributed.wait([future])  # wait for the ensemble process to finish
     assert future.result() == ([], 5, None, None, None)
-    file_path = os.path.join(ensemble_backend.internals_directory, 'ensemble_read_preds.pkl')
+    file_path = os.path.join(
+        ensemble_backend.internals_directory, "ensemble_read_preds.pkl"
+    )
     assert not os.path.exists(file_path)
 
     manager.build_ensemble(dask_client_single_worker, unit_test=True)
diff --git a/test/test_ensemble_builder/test_ensemble_selection.py b/test/test_ensemble_builder/test_ensemble_selection.py
index c03060c037..44e00229fb 100644
--- a/test/test_ensemble_builder/test_ensemble_selection.py
+++ b/test/test_ensemble_builder/test_ensemble_selection.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import pytest
 
 from autosklearn.constants import BINARY_CLASSIFICATION, REGRESSION
@@ -12,10 +11,12 @@ def testEnsembleSelection():
     Makes sure ensemble selection fit method creates an ensemble correctly
     """
 
-    ensemble = EnsembleSelection(ensemble_size=10,
-                                 task_type=REGRESSION,
-                                 random_state=0,
-                                 metric=root_mean_squared_error)
+    ensemble = EnsembleSelection(
+        ensemble_size=10,
+        task_type=REGRESSION,
+        random_state=0,
+        metric=root_mean_squared_error,
+    )
 
     # We create a problem such that we encourage the addition of members to the ensemble
     # Fundamentally, the average of 10 sequential number is 5.5
@@ -23,24 +24,57 @@ def testEnsembleSelection():
     predictions = []
     for i in range(1, 20):
         pred = np.full((100), i, dtype=np.float32)
-        pred[i*5:5*(i+1)] = 5.5 * i
+        pred[i * 5 : 5 * (i + 1)] = 5.5 * i
         predictions.append(pred)
 
     ensemble.fit(predictions, y_true, identifiers=[(i, i, i) for i in range(20)])
 
-    np.testing.assert_array_equal(ensemble.weights_,
-                                  np.array([0.1, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1,
-                                           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
-                                           0.,  0., 0.]))
+    np.testing.assert_array_equal(
+        ensemble.weights_,
+        np.array(
+            [
+                0.1,
+                0.2,
+                0.2,
+                0.1,
+                0.1,
+                0.1,
+                0.1,
+                0.1,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+            ]
+        ),
+    )
 
     assert ensemble.identifiers_ == [(i, i, i) for i in range(20)]
 
-    np.testing.assert_array_almost_equal(np.array(ensemble.trajectory_),
-                                         np.array([3.462296925452813, 2.679202306657711,
-                                                  2.2748626436960375, 2.065717187806695,
-                                                  1.7874562615598728, 1.6983448128441783,
-                                                  1.559451106330085, 1.5316326052614575,
-                                                  1.3801950121782542, 1.3554980575295374]))
+    np.testing.assert_array_almost_equal(
+        np.array(ensemble.trajectory_),
+        np.array(
+            [
+                3.462296925452813,
+                2.679202306657711,
+                2.2748626436960375,
+                2.065717187806695,
+                1.7874562615598728,
+                1.6983448128441783,
+                1.559451106330085,
+                1.5316326052614575,
+                1.3801950121782542,
+                1.3554980575295374,
+            ]
+        ),
+    )
 
 
 def testPredict():
@@ -54,52 +88,38 @@ def testPredict():
     # we first exclude all occurrences of zero in self.weights_, and then
     # apply the weights.
     # If none of the above is the case, predict() raises Error.
-    ensemble = EnsembleSelection(ensemble_size=3,
-                                 task_type=BINARY_CLASSIFICATION,
-                                 random_state=0,
-                                 metric=accuracy,
-                                 )
+    ensemble = EnsembleSelection(
+        ensemble_size=3,
+        task_type=BINARY_CLASSIFICATION,
+        random_state=0,
+        metric=accuracy,
+    )
     # Test for case 1. Create (3, 2, 2) predictions.
-    per_model_pred = np.array([
-        [[0.9, 0.1],
-         [0.4, 0.6]],
-        [[0.8, 0.2],
-         [0.3, 0.7]],
-        [[1.0, 0.0],
-         [0.1, 0.9]]
-    ])
+    per_model_pred = np.array(
+        [[[0.9, 0.1], [0.4, 0.6]], [[0.8, 0.2], [0.3, 0.7]], [[1.0, 0.0], [0.1, 0.9]]]
+    )
     # Weights of 3 hypothetical models
     ensemble.weights_ = [0.7, 0.2, 0.1]
     pred = ensemble.predict(per_model_pred)
-    truth = np.array([[0.89, 0.11],  # This should be the true prediction.
-                      [0.35, 0.65]])
+    truth = np.array(
+        [[0.89, 0.11], [0.35, 0.65]]  # This should be the true prediction.
+    )
     assert np.allclose(pred, truth)
 
     # Test for case 2.
-    per_model_pred = np.array([
-        [[0.9, 0.1],
-         [0.4, 0.6]],
-        [[0.8, 0.2],
-         [0.3, 0.7]],
-        [[1.0, 0.0],
-         [0.1, 0.9]]
-    ])
+    per_model_pred = np.array(
+        [[[0.9, 0.1], [0.4, 0.6]], [[0.8, 0.2], [0.3, 0.7]], [[1.0, 0.0], [0.1, 0.9]]]
+    )
     # The third model now has weight of zero.
     ensemble.weights_ = [0.7, 0.2, 0.0, 0.1]
     pred = ensemble.predict(per_model_pred)
-    truth = np.array([[0.89, 0.11],
-                      [0.35, 0.65]])
+    truth = np.array([[0.89, 0.11], [0.35, 0.65]])
     assert np.allclose(pred, truth)
 
     # Test for error case.
-    per_model_pred = np.array([
-        [[0.9, 0.1],
-         [0.4, 0.6]],
-        [[0.8, 0.2],
-         [0.3, 0.7]],
-        [[1.0, 0.0],
-         [0.1, 0.9]]
-    ])
+    per_model_pred = np.array(
+        [[[0.9, 0.1], [0.4, 0.6]], [[0.8, 0.2], [0.3, 0.7]], [[1.0, 0.0], [0.1, 0.9]]]
+    )
     # Now the weights have 2 zero weights and 2 non-zero weights,
     # which is incompatible.
     ensemble.weights_ = [0.6, 0.0, 0.0, 0.4]
diff --git a/test/test_evaluation/__init__.py b/test/test_evaluation/__init__.py
index cc3cd7becd..e298f0f075 100644
--- a/test/test_evaluation/__init__.py
+++ b/test/test_evaluation/__init__.py
@@ -1,2 +1,2 @@
 # -*- encoding: utf-8 -*-
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_evaluation/evaluation_util.py b/test/test_evaluation/evaluation_util.py
index e8ba4edf07..d8bf017c35 100644
--- a/test/test_evaluation/evaluation_util.py
+++ b/test/test_evaluation/evaluation_util.py
@@ -1,28 +1,53 @@
 import functools
-import traceback
 import tempfile
+import traceback
 import unittest
 
 import numpy as np
-from numpy.linalg import LinAlgError
 import sklearn.datasets
-from sklearn import preprocessing
 import sklearn.model_selection
+from numpy.linalg import LinAlgError
+from sklearn import preprocessing
 
 from autosklearn.automl_common.common.utils.backend import Backend
-
-from autosklearn.constants import \
-    MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION, BINARY_CLASSIFICATION, REGRESSION
-from autosklearn.util.data import convert_to_bin
+from autosklearn.constants import (
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    REGRESSION,
+)
 from autosklearn.data.xy_data_manager import XYDataManager
+from autosklearn.metrics import (
+    accuracy,
+    balanced_accuracy,
+    f1_macro,
+    f1_micro,
+    f1_weighted,
+    log_loss,
+    precision_macro,
+    precision_micro,
+    precision_weighted,
+    recall_macro,
+    recall_micro,
+    recall_weighted,
+)
 from autosklearn.pipeline.util import get_dataset
-from autosklearn.metrics import accuracy, balanced_accuracy, f1_macro, f1_micro, f1_weighted, \
-    log_loss, precision_macro, precision_micro, precision_weighted, recall_macro, \
-    recall_micro, recall_weighted
+from autosklearn.util.data import convert_to_bin
 
-SCORER_LIST = [accuracy, balanced_accuracy, f1_macro, f1_micro, f1_weighted, log_loss,
-               precision_macro, precision_micro, precision_weighted, recall_macro,
-               recall_micro, recall_weighted]
+SCORER_LIST = [
+    accuracy,
+    balanced_accuracy,
+    f1_macro,
+    f1_micro,
+    f1_weighted,
+    log_loss,
+    precision_macro,
+    precision_micro,
+    precision_weighted,
+    recall_macro,
+    recall_micro,
+    recall_weighted,
+]
 
 N_TEST_RUNS = 5
 
@@ -32,14 +57,16 @@ def get_evaluation_backend():
     backend_mock.temporary_directory = tempfile.gettempdir()
 
     # Assign a default data
-    backend_mock.load_datamanager.return_value = get_multiclass_classification_datamanager()
+    backend_mock.load_datamanager.return_value = (
+        get_multiclass_classification_datamanager()
+    )
 
     return backend_mock
 
 
 class Dummy(object):
     def __init__(self):
-        self.name = 'Dummy'
+        self.name = "Dummy"
 
 
 class BaseEvaluatorTest(unittest.TestCase):
@@ -61,82 +88,85 @@ def __fit(self, function_handle):
             function_handle()
             return True
         except KeyError as e:
-            if 'Floating-point under-/overflow occurred at epoch' in \
-                    e.args[0] or \
-                    'removed all features' in e.args[0] or \
-                    'failed to create intent' in e.args[0]:
+            if (
+                "Floating-point under-/overflow occurred at epoch" in e.args[0]
+                or "removed all features" in e.args[0]
+                or "failed to create intent" in e.args[0]
+            ):
                 pass
             else:
                 traceback.print_exc()
                 raise e
         except ValueError as e:
-            if 'Floating-point under-/overflow occurred at epoch' in e.args[
-                0] or \
-                            'removed all features' in e.args[0] or \
-                            'failed to create intent' in e.args[0]:
+            if (
+                "Floating-point under-/overflow occurred at epoch" in e.args[0]
+                or "removed all features" in e.args[0]
+                or "failed to create intent" in e.args[0]
+            ):
                 pass
             else:
                 raise e
         except LinAlgError as e:
-            if 'not positive definite, even with jitter' in e.args[0]:
+            if "not positive definite, even with jitter" in e.args[0]:
                 pass
             else:
                 raise e
         except RuntimeWarning as e:
-            if 'invalid value encountered in sqrt' in e.args[0]:
+            if "invalid value encountered in sqrt" in e.args[0]:
                 pass
-            elif 'divide by zero encountered in divide' in e.args[0]:
+            elif "divide by zero encountered in divide" in e.args[0]:
                 pass
             else:
                 raise e
         except UserWarning as e:
-            if 'FastICA did not converge' in e.args[0]:
+            if "FastICA did not converge" in e.args[0]:
                 pass
             else:
                 raise e
 
 
 def get_multiclass_classification_datamanager():
-    X_train, Y_train, X_test, Y_test = get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = get_dataset("iris")
     indices = list(range(X_train.shape[0]))
     np.random.seed(1)
     np.random.shuffle(indices)
     X_train = X_train[indices]
     Y_train = Y_train[indices]
 
-    X_valid = X_test[:25, ]
-    Y_valid = Y_test[:25, ]
-    X_test = X_test[25:, ]
-    Y_test = Y_test[25:, ]
+    X_valid = X_test[
+        :25,
+    ]
+    Y_valid = Y_test[
+        :25,
+    ]
+    X_test = X_test[
+        25:,
+    ]
+    Y_test = Y_test[
+        25:,
+    ]
 
     D = Dummy()
-    D.info = {
-        'task': MULTICLASS_CLASSIFICATION,
-        'is_sparse': False,
-        'label_num': 3
-    }
+    D.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False, "label_num": 3}
     D.data = {
-        'X_train': X_train,
-        'Y_train': Y_train,
-        'X_valid': X_valid,
-        'Y_valid': Y_valid,
-        'X_test': X_test,
-        'Y_test': Y_test
+        "X_train": X_train,
+        "Y_train": Y_train,
+        "X_valid": X_valid,
+        "Y_valid": Y_valid,
+        "X_test": X_test,
+        "Y_test": Y_test,
     }
-    D.feat_type = {0: 'numerical',
-                   1: 'Numerical',
-                   2: 'numerical',
-                   3: 'numerical'}
+    D.feat_type = {0: "numerical", 1: "Numerical", 2: "numerical", 3: "numerical"}
     return D
 
 
 def get_abalone_datamanager():
     # https://www.openml.org/d/183
-    dataset_name = 'abalone'
+    dataset_name = "abalone"
     data = sklearn.datasets.fetch_openml(data_id=183, as_frame=True)
     feat_type = {
-        i: 'Categorical' if x.name == 'category' else 'Numerical'
-        for i, x in enumerate(data['data'].dtypes)
+        i: "Categorical" if x.name == "category" else "Numerical"
+        for i, x in enumerate(data["data"].dtypes)
     }
     X, y = sklearn.datasets.fetch_openml(data_id=183, return_X_y=True, as_frame=False)
     y = preprocessing.LabelEncoder().fit_transform(y)
@@ -145,17 +175,19 @@ def get_abalone_datamanager():
     )
 
     D = XYDataManager(
-        X_train, y_train,
-        X_test, y_test,
+        X_train,
+        y_train,
+        X_test,
+        y_test,
         MULTICLASS_CLASSIFICATION,
         feat_type,
-        dataset_name
+        dataset_name,
     )
     return D
 
 
 def get_multilabel_classification_datamanager():
-    X_train, Y_train, X_test, Y_test = get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = get_dataset("iris")
     indices = list(range(X_train.shape[0]))
     np.random.seed(1)
     np.random.shuffle(indices)
@@ -171,34 +203,35 @@ def get_multilabel_classification_datamanager():
     #    Y_test_[:, Y_test[i]] = 1
     # Y_test = Y_test_
 
-    X_valid = X_test[:25, ]
-    Y_valid = Y_test[:25, ]
-    X_test = X_test[25:, ]
-    Y_test = Y_test[25:, ]
+    X_valid = X_test[
+        :25,
+    ]
+    Y_valid = Y_test[
+        :25,
+    ]
+    X_test = X_test[
+        25:,
+    ]
+    Y_test = Y_test[
+        25:,
+    ]
 
     D = Dummy()
-    D.info = {
-        'task': MULTILABEL_CLASSIFICATION,
-        'is_sparse': False,
-        'label_num': 3
-    }
+    D.info = {"task": MULTILABEL_CLASSIFICATION, "is_sparse": False, "label_num": 3}
     D.data = {
-        'X_train': X_train,
-        'Y_train': Y_train,
-        'X_valid': X_valid,
-        'Y_valid': Y_valid,
-        'X_test': X_test,
-        'Y_test': Y_test
+        "X_train": X_train,
+        "Y_train": Y_train,
+        "X_valid": X_valid,
+        "Y_valid": Y_valid,
+        "X_test": X_test,
+        "Y_test": Y_test,
     }
-    D.feat_type = {0: 'numerical',
-                   1: 'Numerical',
-                   2: 'numerical',
-                   3: 'numerical'}
+    D.feat_type = {0: "numerical", 1: "Numerical", 2: "numerical", 3: "numerical"}
     return D
 
 
 def get_binary_classification_datamanager():
-    X_train, Y_train, X_test, Y_test = get_dataset('iris')
+    X_train, Y_train, X_test, Y_test = get_dataset("iris")
     indices = list(range(X_train.shape[0]))
     np.random.seed(1)
     np.random.shuffle(indices)
@@ -213,99 +246,108 @@ def get_binary_classification_datamanager():
     X_test = X_test[eliminate_class_two]
     Y_test = Y_test[eliminate_class_two]
 
-    X_valid = X_test[:25, ]
-    Y_valid = Y_test[:25, ]
-    X_test = X_test[25:, ]
-    Y_test = Y_test[25:, ]
+    X_valid = X_test[
+        :25,
+    ]
+    Y_valid = Y_test[
+        :25,
+    ]
+    X_test = X_test[
+        25:,
+    ]
+    Y_test = Y_test[
+        25:,
+    ]
 
     D = Dummy()
-    D.info = {
-        'task': BINARY_CLASSIFICATION,
-        'is_sparse': False,
-        'label_num': 2
-    }
+    D.info = {"task": BINARY_CLASSIFICATION, "is_sparse": False, "label_num": 2}
     D.data = {
-        'X_train': X_train,
-        'Y_train': Y_train.reshape((-1, 1)),
-        'X_valid': X_valid,
-        'Y_valid': Y_valid.reshape((-1, 1)),
-        'X_test': X_test,
-        'Y_test': Y_test.reshape((-1, 1))
+        "X_train": X_train,
+        "Y_train": Y_train.reshape((-1, 1)),
+        "X_valid": X_valid,
+        "Y_valid": Y_valid.reshape((-1, 1)),
+        "X_test": X_test,
+        "Y_test": Y_test.reshape((-1, 1)),
     }
-    D.feat_type = {0: 'numerical',
-                   1: 'Numerical',
-                   2: 'numerical',
-                   3: 'numerical'}
+    D.feat_type = {0: "numerical", 1: "Numerical", 2: "numerical", 3: "numerical"}
     return D
 
 
 def get_regression_datamanager():
-    X_train, Y_train, X_test, Y_test = get_dataset('boston')
+    X_train, Y_train, X_test, Y_test = get_dataset("boston")
     indices = list(range(X_train.shape[0]))
     np.random.seed(1)
     np.random.shuffle(indices)
     X_train = X_train[indices]
     Y_train = Y_train[indices]
 
-    X_valid = X_test[:200, ]
-    Y_valid = Y_test[:200, ]
-    X_test = X_test[200:, ]
-    Y_test = Y_test[200:, ]
+    X_valid = X_test[
+        :200,
+    ]
+    Y_valid = Y_test[
+        :200,
+    ]
+    X_test = X_test[
+        200:,
+    ]
+    Y_test = Y_test[
+        200:,
+    ]
 
     D = Dummy()
-    D.info = {
-        'task': REGRESSION,
-        'is_sparse': False,
-        'label_num': 1
-    }
+    D.info = {"task": REGRESSION, "is_sparse": False, "label_num": 1}
     D.data = {
-        'X_train': X_train,
-        'Y_train': Y_train.reshape((-1, 1)),
-        'X_valid': X_valid,
-        'Y_valid': Y_valid.reshape((-1, 1)),
-        'X_test': X_test,
-        'Y_test': Y_test.reshape((-1, 1))
+        "X_train": X_train,
+        "Y_train": Y_train.reshape((-1, 1)),
+        "X_valid": X_valid,
+        "Y_valid": Y_valid.reshape((-1, 1)),
+        "X_test": X_test,
+        "Y_test": Y_test.reshape((-1, 1)),
     }
-    D.feat_type = {i: 'numerical' for i in range(X_train.shape[1])}
+    D.feat_type = {i: "numerical" for i in range(X_train.shape[1])}
     return D
 
 
 def get_500_classes_datamanager():
     weights = ([0.002] * 475) + ([0.001] * 25)
-    X, Y = sklearn.datasets.make_classification(n_samples=1000,
-                                                n_features=20,
-                                                n_classes=500,
-                                                n_clusters_per_class=1,
-                                                n_informative=15,
-                                                n_redundant=5,
-                                                n_repeated=0,
-                                                weights=weights,
-                                                flip_y=0,
-                                                class_sep=1.0,
-                                                hypercube=True,
-                                                shift=None,
-                                                scale=1.0,
-                                                shuffle=True,
-                                                random_state=1)
+    X, Y = sklearn.datasets.make_classification(
+        n_samples=1000,
+        n_features=20,
+        n_classes=500,
+        n_clusters_per_class=1,
+        n_informative=15,
+        n_redundant=5,
+        n_repeated=0,
+        weights=weights,
+        flip_y=0,
+        class_sep=1.0,
+        hypercube=True,
+        shift=None,
+        scale=1.0,
+        shuffle=True,
+        random_state=1,
+    )
 
     D = Dummy()
-    D.info = {
-        'task': MULTICLASS_CLASSIFICATION,
-        'is_sparse': False,
-        'label_num': 500
+    D.info = {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False, "label_num": 500}
+    D.data = {
+        "X_train": X[:700],
+        "Y_train": Y[:700],
+        "X_valid": X[700:710],
+        "Y_valid": Y[700:710],
+        "X_test": X[710:],
+        "Y_test": Y[710:],
     }
-    D.data = {'X_train': X[:700], 'Y_train': Y[:700],
-              'X_valid': X[700:710], 'Y_valid': Y[700:710],
-              'X_test': X[710:], 'Y_test': Y[710:]
-              }
-    D.feat_type = {i: 'numerical' for i in range(20)}
+    D.feat_type = {i: "numerical" for i in range(20)}
     return D
 
 
 def get_dataset_getters():
-    return [get_binary_classification_datamanager,
-            get_multiclass_classification_datamanager,
-            get_multilabel_classification_datamanager,
-            get_500_classes_datamanager,
-            get_abalone_datamanager,
-            get_regression_datamanager]
+    return [
+        get_binary_classification_datamanager,
+        get_multiclass_classification_datamanager,
+        get_multilabel_classification_datamanager,
+        get_500_classes_datamanager,
+        get_abalone_datamanager,
+        get_regression_datamanager,
+    ]
diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py
index f51820221b..c668a82ffd 100644
--- a/test/test_evaluation/test_abstract_evaluator.py
+++ b/test/test_evaluation/test_abstract_evaluator.py
@@ -3,19 +3,18 @@
 import os
 import shutil
 import sys
+import tempfile
 import unittest
 import unittest.mock
-import tempfile
 
 import numpy as np
 import sklearn.dummy
+from smac.tae import StatusType
 
 from autosklearn.automl_common.common.utils.backend import Backend, BackendContext
-
 from autosklearn.evaluation.abstract_evaluator import AbstractEvaluator
-from autosklearn.pipeline.components.base import _addons
 from autosklearn.metrics import accuracy
-from smac.tae import StatusType
+from autosklearn.pipeline.components.base import _addons
 
 this_directory = os.path.dirname(__file__)
 sys.path.append(this_directory)
@@ -29,7 +28,7 @@ def setUp(self):
         """
         Creates a backend mock
         """
-        self.ev_path = os.path.join(this_directory, '.tmp_evaluations')
+        self.ev_path = os.path.join(this_directory, ".tmp_evaluations")
         if not os.path.exists(self.ev_path):
             os.mkdir(self.ev_path)
         dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)]
@@ -46,7 +45,7 @@ def setUp(self):
 
         self.port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
 
-        self.working_directory = os.path.join(this_directory, '.tmp_%s' % self.id())
+        self.working_directory = os.path.join(this_directory, ".tmp_%s" % self.id())
 
     def tearDown(self):
         if os.path.exists(self.ev_path):
@@ -56,16 +55,18 @@ def tearDown(self):
                 pass
 
     def test_finish_up_model_predicts_NaN(self):
-        '''Tests by handing in predictions which contain NaNs'''
+        """Tests by handing in predictions which contain NaNs"""
         rs = np.random.RandomState(1)
 
         queue_mock = unittest.mock.Mock()
-        ae = AbstractEvaluator(backend=self.backend_mock,
-                               port=self.port,
-                               output_y_hat_optimization=False,
-                               queue=queue_mock, metric=accuracy,
-                               additional_components=dict(),
-                               )
+        ae = AbstractEvaluator(
+            backend=self.backend_mock,
+            port=self.port,
+            output_y_hat_optimization=False,
+            queue=queue_mock,
+            metric=accuracy,
+            additional_components=dict(),
+        )
         ae.Y_optimization = rs.rand(33, 3)
         predictions_ensemble = rs.rand(33, 3)
         predictions_test = rs.rand(25, 3)
@@ -85,9 +86,10 @@ def test_finish_up_model_predicts_NaN(self):
             status=StatusType.SUCCESS,
         )
         self.assertEqual(loss, 1.0)
-        self.assertEqual(additional_run_info,
-                         {'error': 'Model predictions for optimization set '
-                                   'contains NaNs.'})
+        self.assertEqual(
+            additional_run_info,
+            {"error": "Model predictions for optimization set " "contains NaNs."},
+        )
 
         # NaNs in prediction validation
         predictions_ensemble[5, 2] = 0.5
@@ -104,9 +106,10 @@ def test_finish_up_model_predicts_NaN(self):
             status=StatusType.SUCCESS,
         )
         self.assertEqual(loss, 1.0)
-        self.assertEqual(additional_run_info,
-                         {'error': 'Model predictions for validation set '
-                                   'contains NaNs.'})
+        self.assertEqual(
+            additional_run_info,
+            {"error": "Model predictions for validation set " "contains NaNs."},
+        )
 
         # NaNs in prediction test
         predictions_valid[5, 2] = 0.5
@@ -123,9 +126,10 @@ def test_finish_up_model_predicts_NaN(self):
             status=StatusType.SUCCESS,
         )
         self.assertEqual(loss, 1.0)
-        self.assertEqual(additional_run_info,
-                         {'error': 'Model predictions for test set contains '
-                                   'NaNs.'})
+        self.assertEqual(
+            additional_run_info,
+            {"error": "Model predictions for test set contains " "NaNs."},
+        )
 
         self.assertEqual(self.backend_mock.save_predictions_as_npy.call_count, 0)
 
@@ -147,12 +151,10 @@ def test_disable_file_output(self):
         predictions_test = rs.rand(25, 3)
         predictions_valid = rs.rand(25, 3)
 
-        loss_, additional_run_info_ = (
-            ae.file_output(
-                predictions_ensemble,
-                predictions_valid,
-                predictions_test,
-            )
+        loss_, additional_run_info_ = ae.file_output(
+            predictions_ensemble,
+            predictions_valid,
+            predictions_test,
         )
 
         self.assertIsNone(loss_)
@@ -160,7 +162,7 @@ def test_disable_file_output(self):
         # This function is never called as there is a return before
         self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 0)
 
-        for call_count, disable in enumerate(['model', 'cv_model'], start=1):
+        for call_count, disable in enumerate(["model", "cv_model"], start=1):
             ae = AbstractEvaluator(
                 backend=self.backend_mock,
                 output_y_hat_optimization=False,
@@ -174,38 +176,49 @@ def test_disable_file_output(self):
             ae.model = unittest.mock.Mock()
             ae.models = [unittest.mock.Mock()]
 
-            loss_, additional_run_info_ = (
-                ae.file_output(
-                    predictions_ensemble,
-                    predictions_valid,
-                    predictions_test,
-                )
+            loss_, additional_run_info_ = ae.file_output(
+                predictions_ensemble,
+                predictions_valid,
+                predictions_test,
             )
 
             self.assertIsNone(loss_)
             self.assertEqual(additional_run_info_, {})
-            self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, call_count)
-            if disable == 'model':
+            self.assertEqual(
+                self.backend_mock.save_numrun_to_dir.call_count, call_count
+            )
+            if disable == "model":
                 self.assertIsNone(
-                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["model"]
+                )
                 self.assertIsNotNone(
-                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                        "cv_model"
+                    ]
+                )
             else:
                 self.assertIsNotNone(
-                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["model"]
+                )
                 self.assertIsNone(
-                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                        "cv_model"
+                    ]
+                )
             self.assertIsNotNone(
                 self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
-                    'ensemble_predictions']
+                    "ensemble_predictions"
+                ]
             )
             self.assertIsNotNone(
                 self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
-                    'valid_predictions']
+                    "valid_predictions"
+                ]
             )
             self.assertIsNotNone(
                 self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
-                    'test_predictions']
+                    "test_predictions"
+                ]
             )
 
         ae = AbstractEvaluator(
@@ -213,20 +226,18 @@ def test_disable_file_output(self):
             output_y_hat_optimization=False,
             queue=queue_mock,
             metric=accuracy,
-            disable_file_output=['y_optimization'],
+            disable_file_output=["y_optimization"],
             port=self.port,
             additional_components=dict(),
         )
         ae.Y_optimization = predictions_ensemble
-        ae.model = 'model'
+        ae.model = "model"
         ae.models = [unittest.mock.Mock()]
 
-        loss_, additional_run_info_ = (
-            ae.file_output(
-                predictions_ensemble,
-                predictions_valid,
-                predictions_test,
-            )
+        loss_, additional_run_info_ = ae.file_output(
+            predictions_ensemble,
+            predictions_valid,
+            predictions_test,
         )
 
         self.assertIsNone(loss_)
@@ -234,15 +245,18 @@ def test_disable_file_output(self):
 
         self.assertIsNone(
             self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
-                'ensemble_predictions']
+                "ensemble_predictions"
+            ]
         )
         self.assertIsNotNone(
             self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
-                'valid_predictions']
+                "valid_predictions"
+            ]
         )
         self.assertIsNotNone(
             self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
-                'test_predictions']
+                "test_predictions"
+            ]
         )
 
     def test_file_output(self):
@@ -252,14 +266,18 @@ def test_file_output(self):
         queue_mock = unittest.mock.Mock()
 
         context = BackendContext(
-            temporary_directory=os.path.join(self.working_directory, 'tmp'),
-            output_directory=os.path.join(self.working_directory, 'tmp_output'),
+            temporary_directory=os.path.join(self.working_directory, "tmp"),
+            output_directory=os.path.join(self.working_directory, "tmp_output"),
             delete_tmp_folder_after_terminate=True,
             delete_output_folder_after_terminate=True,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
-        with unittest.mock.patch.object(Backend, 'load_datamanager') as load_datamanager_mock:
-            load_datamanager_mock.return_value = get_multiclass_classification_datamanager()
+        with unittest.mock.patch.object(
+            Backend, "load_datamanager"
+        ) as load_datamanager_mock:
+            load_datamanager_mock.return_value = (
+                get_multiclass_classification_datamanager()
+            )
 
             backend = Backend(context, prefix="auto-sklearn")
 
@@ -285,8 +303,17 @@ def test_file_output(self):
                 Y_test_pred=predictions_test,
             )
 
-            self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'tmp',
-                                                        '.auto-sklearn', 'runs', '1_0_None')))
+            self.assertTrue(
+                os.path.exists(
+                    os.path.join(
+                        self.working_directory,
+                        "tmp",
+                        ".auto-sklearn",
+                        "runs",
+                        "1_0_None",
+                    )
+                )
+            )
 
             shutil.rmtree(self.working_directory, ignore_errors=True)
 
@@ -297,26 +324,34 @@ def test_add_additional_components(self):
         queue_mock = unittest.mock.Mock()
 
         context = BackendContext(
-            temporary_directory=os.path.join(self.working_directory, 'tmp'),
-            output_directory=os.path.join(self.working_directory, 'tmp_output'),
+            temporary_directory=os.path.join(self.working_directory, "tmp"),
+            output_directory=os.path.join(self.working_directory, "tmp_output"),
             delete_tmp_folder_after_terminate=True,
             delete_output_folder_after_terminate=True,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
-        with unittest.mock.patch.object(Backend, 'load_datamanager') as load_datamanager_mock:
-            load_datamanager_mock.return_value = get_multiclass_classification_datamanager()
+        with unittest.mock.patch.object(
+            Backend, "load_datamanager"
+        ) as load_datamanager_mock:
+            load_datamanager_mock.return_value = (
+                get_multiclass_classification_datamanager()
+            )
             backend = Backend(context, prefix="auto-sklearn")
 
-            with unittest.mock.patch.object(_addons['classification'], 'add_component') as _:
+            with unittest.mock.patch.object(
+                _addons["classification"], "add_component"
+            ) as _:
 
-                # If the components in the argument `additional_components` are an empty dict
-                # there is no call to `add_component`, if there's something in it, `add_component
-                # is called (2nd case)
-                for fixture, case in ((0, dict()), (1, dict(abc='def'))):
+                # If the components in the argument `additional_components` are an
+                # empty dict there is no call to `add_component`,
+                # if there's something in it, `add_component is called (2nd case)
+                for fixture, case in ((0, dict()), (1, dict(abc="def"))):
 
                     thirdparty_components_patch = unittest.mock.Mock()
                     thirdparty_components_patch.components = case
-                    additional_components = dict(classification=thirdparty_components_patch)
+                    additional_components = dict(
+                        classification=thirdparty_components_patch
+                    )
                     AbstractEvaluator(
                         backend=backend,
                         output_y_hat_optimization=False,
@@ -325,4 +360,6 @@ def test_add_additional_components(self):
                         port=self.port,
                         additional_components=additional_components,
                     )
-                    self.assertEqual(_addons['classification'].add_component.call_count, fixture)
+                    self.assertEqual(
+                        _addons["classification"].add_component.call_count, fixture
+                    )
diff --git a/test/test_evaluation/test_custom_splitters.py b/test/test_evaluation/test_custom_splitters.py
index 4922442228..64f9dc2f18 100644
--- a/test/test_evaluation/test_custom_splitters.py
+++ b/test/test_evaluation/test_custom_splitters.py
@@ -1,37 +1,44 @@
-import pytest
-
 import numpy as np
+import pytest
 
-from autosklearn.evaluation.splitter import CustomStratifiedShuffleSplit
 from autosklearn.constants import (
-    BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
 )
+from autosklearn.evaluation.splitter import CustomStratifiedShuffleSplit
 
 
-@pytest.mark.parametrize("task, X, y", [
-    (
-        BINARY_CLASSIFICATION,
-        np.asarray(10000 * [[1, 1, 1, 1, 1]]),
-        np.asarray(9999 * [0] + 1 * [1])
-    ),
-    (
-        MULTICLASS_CLASSIFICATION,
-        np.asarray(10000 * [[1, 1, 1, 1, 1]]),
-        np.asarray(4999 * [1] + 4999 * [2] + 1 * [3] + 1 * [4])),
-    (
-        MULTILABEL_CLASSIFICATION,
-        np.asarray(10000 * [[1, 1, 1, 1, 1]]),
-        np.asarray(4999 * [[0, 1, 1]] + 4999 * [[1, 1, 0]] + 1 * [[1, 0, 1]] + 1 * [[0, 0, 0]])
-    )
-])
-@pytest.mark.parametrize('train_size', [100, 0.5, 200, 0.75])
+@pytest.mark.parametrize(
+    "task, X, y",
+    [
+        (
+            BINARY_CLASSIFICATION,
+            np.asarray(10000 * [[1, 1, 1, 1, 1]]),
+            np.asarray(9999 * [0] + 1 * [1]),
+        ),
+        (
+            MULTICLASS_CLASSIFICATION,
+            np.asarray(10000 * [[1, 1, 1, 1, 1]]),
+            np.asarray(4999 * [1] + 4999 * [2] + 1 * [3] + 1 * [4]),
+        ),
+        (
+            MULTILABEL_CLASSIFICATION,
+            np.asarray(10000 * [[1, 1, 1, 1, 1]]),
+            np.asarray(
+                4999 * [[0, 1, 1]]
+                + 4999 * [[1, 1, 0]]
+                + 1 * [[1, 0, 1]]
+                + 1 * [[0, 0, 0]]
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("train_size", [100, 0.5, 200, 0.75])
 def test_custom_stratified_shuffle_split_returns_unique_labels_and_maintains_size(
     task, X, y, train_size
 ):
-    splitter = CustomStratifiedShuffleSplit(
-        train_size=train_size,
-        random_state=1
-    )
+    splitter = CustomStratifiedShuffleSplit(train_size=train_size, random_state=1)
     left_idxs, _ = next(splitter.split(X=X, y=y))
     y_sampled = y[left_idxs]
     X_sampled = X[left_idxs]
@@ -46,5 +53,6 @@ def test_custom_stratified_shuffle_split_returns_unique_labels_and_maintains_siz
     assert len(X_sampled) == n_samples
 
     # Assert all the unique labels are present in the training set
-    assert all(label in np.unique(y_sampled) for label in np.unique(y)), \
-        f"{task} failed, {np.unique(y)} != {np.unique(y_sampled)}"
+    assert all(
+        label in np.unique(y_sampled) for label in np.unique(y)
+    ), f"{task} failed, {np.unique(y)} != {np.unique(y_sampled)}"
diff --git a/test/test_evaluation/test_dummy_pipelines.py b/test/test_evaluation/test_dummy_pipelines.py
index ed7c499711..3d5f1d0f59 100644
--- a/test/test_evaluation/test_dummy_pipelines.py
+++ b/test/test_evaluation/test_dummy_pipelines.py
@@ -1,20 +1,21 @@
 import numpy as np
-
 import pytest
-
 from sklearn.base import clone
 from sklearn.datasets import make_classification, make_regression
 from sklearn.utils.validation import check_is_fitted
 
-from autosklearn.evaluation.abstract_evaluator import MyDummyClassifier, MyDummyRegressor
+from autosklearn.evaluation.abstract_evaluator import (
+    MyDummyClassifier,
+    MyDummyRegressor,
+)
 
 
-@pytest.mark.parametrize("task_type", ['classification', 'regression'])
+@pytest.mark.parametrize("task_type", ["classification", "regression"])
 def test_dummy_pipeline(task_type):
-    if task_type == 'classification':
+    if task_type == "classification":
         estimator_class = MyDummyClassifier
         data_maker = make_classification
-    elif task_type == 'regression':
+    elif task_type == "regression":
         estimator_class = MyDummyRegressor
         data_maker = make_regression
     else:
diff --git a/test/test_evaluation/test_evaluation.py b/test/test_evaluation/test_evaluation.py
index 77f6e5c4bf..67d9e0ca8b 100644
--- a/test/test_evaluation/test_evaluation.py
+++ b/test/test_evaluation/test_evaluation.py
@@ -1,6 +1,6 @@
-import os
 import logging
 import logging.handlers
+import os
 import shutil
 import sys
 import time
@@ -19,20 +19,21 @@
 
 this_directory = os.path.dirname(__file__)
 sys.path.append(this_directory)
-from evaluation_util import get_multiclass_classification_datamanager, get_evaluation_backend  # noqa E402
+from evaluation_util import (  # noqa E402
+    get_evaluation_backend,
+    get_multiclass_classification_datamanager,
+)
 
 
 def safe_eval_success_mock(*args, **kwargs):
-    queue = kwargs['queue']
-    queue.put({'status': StatusType.SUCCESS,
-               'loss': 0.5,
-               'additional_run_info': ''})
+    queue = kwargs["queue"]
+    queue.put({"status": StatusType.SUCCESS, "loss": 0.5, "additional_run_info": ""})
 
 
 class EvaluationTest(unittest.TestCase):
     def setUp(self):
         self.datamanager = get_multiclass_classification_datamanager()
-        self.tmp = os.path.join(os.getcwd(), '.test_evaluation')
+        self.tmp = os.path.join(os.getcwd(), ".test_evaluation")
         self.logger = logging.getLogger()
         scenario_mock = unittest.mock.Mock()
         scenario_mock.wallclock_limit = 10
@@ -72,142 +73,211 @@ def test_pynisher_timeout(self):
         def run_over_time():
             time.sleep(2)
 
-        safe_eval = pynisher.enforce_limits(wall_time_in_s=1,
-                                            grace_period_in_s=0)(run_over_time)
+        safe_eval = pynisher.enforce_limits(wall_time_in_s=1, grace_period_in_s=0)(
+            run_over_time
+        )
         safe_eval()
         self.assertEqual(safe_eval.exit_status, pynisher.TimeoutException)
 
     ############################################################################
     # Test ExecuteTaFuncWithQueue.run_wrapper()
-    @unittest.mock.patch('autosklearn.evaluation.train_evaluator.eval_holdout')
+    @unittest.mock.patch("autosklearn.evaluation.train_evaluator.eval_holdout")
     def test_eval_with_limits_holdout(self, pynisher_mock):
         pynisher_mock.side_effect = safe_eval_success_mock
         config = unittest.mock.Mock()
         config.config_id = 198
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='fork',
-                                    )
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="fork",
+        )
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[0].config.config_id, 198)
         self.assertEqual(info[1].status, StatusType.SUCCESS)
         self.assertEqual(info[1].cost, 0.5)
         self.assertIsInstance(info[1].time, float)
 
-    @unittest.mock.patch('pynisher.enforce_limits')
+    @unittest.mock.patch("pynisher.enforce_limits")
     def test_zero_or_negative_cutoff(self, pynisher_mock):
         config = unittest.mock.Mock()
         config.config_id = 198
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='forkserver',
-                                    )
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="forkserver",
+        )
         self.scenario.wallclock_limit = 5
         self.stats.submitted_ta_runs += 1
-        run_info, run_value = ta.run_wrapper(RunInfo(config=config, cutoff=9, instance=None,
-                                             instance_specific=None, seed=1, capped=False))
+        run_info, run_value = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=9,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(run_value.status, StatusType.STOP)
 
-    @unittest.mock.patch('pynisher.enforce_limits')
+    @unittest.mock.patch("pynisher.enforce_limits")
     def test_cutoff_lower_than_remaining_time(self, pynisher_mock):
         config = unittest.mock.Mock()
         config.config_id = 198
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='forkserver',
-                                    )
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="forkserver",
+        )
         self.stats.ta_runs = 1
-        ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, instance_specific=None,
-                               seed=1, capped=False))
-        self.assertEqual(pynisher_mock.call_args[1]['wall_time_in_s'], 4)
-        self.assertIsInstance(pynisher_mock.call_args[1]['wall_time_in_s'], int)
+        ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
+        self.assertEqual(pynisher_mock.call_args[1]["wall_time_in_s"], 4)
+        self.assertIsInstance(pynisher_mock.call_args[1]["wall_time_in_s"], int)
 
-    @unittest.mock.patch('autosklearn.evaluation.train_evaluator.eval_holdout')
+    @unittest.mock.patch("autosklearn.evaluation.train_evaluator.eval_holdout")
     def test_eval_with_limits_holdout_fail_silent(self, pynisher_mock):
         pynisher_mock.return_value = None
         config = unittest.mock.Mock()
-        config.origin = 'MOCK'
+        config.origin = "MOCK"
         config.config_id = 198
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='fork',
-                                    )
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="fork",
+        )
 
         # The following should not fail because abort on first config crashed is false
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=60, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=60,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.CRASHED)
         self.assertEqual(info[1].cost, 1.0)
         self.assertIsInstance(info[1].time, float)
-        self.assertEqual(info[1].additional_info, {'configuration_origin': 'MOCK',
-                                                   'error': "Result queue is empty",
-                                                   'exit_status': 0,
-                                                   'exitcode': 0,
-                                                   'subprocess_stdout': '',
-                                                   'subprocess_stderr': ''})
+        self.assertEqual(
+            info[1].additional_info,
+            {
+                "configuration_origin": "MOCK",
+                "error": "Result queue is empty",
+                "exit_status": 0,
+                "exitcode": 0,
+                "subprocess_stdout": "",
+                "subprocess_stderr": "",
+            },
+        )
 
         self.stats.submitted_ta_runs += 1
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.CRASHED)
         self.assertEqual(info[1].cost, 1.0)
         self.assertIsInstance(info[1].time, float)
-        self.assertEqual(info[1].additional_info, {'configuration_origin': 'MOCK',
-                                                   'error': "Result queue is empty",
-                                                   'exit_status': 0,
-                                                   'exitcode': 0,
-                                                   'subprocess_stdout': '',
-                                                   'subprocess_stderr': ''})
-
-    @unittest.mock.patch('autosklearn.evaluation.train_evaluator.eval_holdout')
+        self.assertEqual(
+            info[1].additional_info,
+            {
+                "configuration_origin": "MOCK",
+                "error": "Result queue is empty",
+                "exit_status": 0,
+                "exitcode": 0,
+                "subprocess_stdout": "",
+                "subprocess_stderr": "",
+            },
+        )
+
+    @unittest.mock.patch("autosklearn.evaluation.train_evaluator.eval_holdout")
     def test_eval_with_limits_holdout_fail_memory_error(self, pynisher_mock):
         pynisher_mock.side_effect = MemoryError
         config = unittest.mock.Mock()
         config.config_id = 198
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=log_loss,
-                                    cost_for_crash=get_cost_of_crash(log_loss),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='fork',
-                                    )
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=log_loss,
+            cost_for_crash=get_cost_of_crash(log_loss),
+            abort_on_first_run_crash=False,
+            pynisher_context="fork",
+        )
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.MEMOUT)
 
         # For logloss, worst possible result is MAXINT
         worst_possible_result = MAXINT
         self.assertEqual(info[1].cost, worst_possible_result)
         self.assertIsInstance(info[1].time, float)
-        self.assertNotIn('exitcode', info[1].additional_info)
+        self.assertNotIn("exitcode", info[1].additional_info)
 
-    @unittest.mock.patch('pynisher.enforce_limits')
+    @unittest.mock.patch("pynisher.enforce_limits")
     def test_eval_with_limits_holdout_fail_timeout(self, pynisher_mock):
         config = unittest.mock.Mock()
         config.config_id = 198
@@ -218,33 +288,46 @@ def test_eval_with_limits_holdout_fail_timeout(self, pynisher_mock):
         pynisher_mock.return_value = m1
         m2.exit_status = pynisher.TimeoutException
         m2.wall_clock_time = 30
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='forkserver',
-                                    )
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="forkserver",
+        )
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.TIMEOUT)
         self.assertEqual(info[1].cost, 1.0)
         self.assertIsInstance(info[1].time, float)
-        self.assertNotIn('exitcode', info[1].additional_info)
+        self.assertNotIn("exitcode", info[1].additional_info)
 
-    @unittest.mock.patch('pynisher.enforce_limits')
-    def test_eval_with_limits_holdout_timeout_with_results_in_queue(self, pynisher_mock):
+    @unittest.mock.patch("pynisher.enforce_limits")
+    def test_eval_with_limits_holdout_timeout_with_results_in_queue(
+        self, pynisher_mock
+    ):
         config = unittest.mock.Mock()
         config.config_id = 198
 
         def side_effect(**kwargs):
-            queue = kwargs['queue']
-            queue.put({'status': StatusType.SUCCESS,
-                       'loss': 0.5,
-                       'additional_run_info': {}})
+            queue = kwargs["queue"]
+            queue.put(
+                {"status": StatusType.SUCCESS, "loss": 0.5, "additional_run_info": {}}
+            )
+
         m1 = unittest.mock.Mock()
         m2 = unittest.mock.Mock()
         m1.return_value = m2
@@ -254,137 +337,194 @@ def side_effect(**kwargs):
         m2.wall_clock_time = 30
 
         # Test for a succesful run
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='forkserver',
-                                    )
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="forkserver",
+        )
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.SUCCESS)
         self.assertEqual(info[1].cost, 0.5)
         self.assertIsInstance(info[1].time, float)
-        self.assertNotIn('exitcode', info[1].additional_info)
+        self.assertNotIn("exitcode", info[1].additional_info)
 
         # And a crashed run which is in the queue
         def side_effect(**kwargs):
-            queue = kwargs['queue']
-            queue.put({'status': StatusType.CRASHED,
-                       'loss': 2.0,
-                       'additional_run_info': {}})
+            queue = kwargs["queue"]
+            queue.put(
+                {"status": StatusType.CRASHED, "loss": 2.0, "additional_run_info": {}}
+            )
+
         m2.side_effect = side_effect
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='forkserver',
-                                    )
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="forkserver",
+        )
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.CRASHED)
         self.assertEqual(info[1].cost, 1.0)
         self.assertIsInstance(info[1].time, float)
-        self.assertNotIn('exitcode', info[1].additional_info)
+        self.assertNotIn("exitcode", info[1].additional_info)
 
-    @unittest.mock.patch('autosklearn.evaluation.train_evaluator.eval_holdout')
+    @unittest.mock.patch("autosklearn.evaluation.train_evaluator.eval_holdout")
     def test_eval_with_limits_holdout_2(self, eval_houldout_mock):
         config = unittest.mock.Mock()
         config.config_id = 198
 
         def side_effect(*args, **kwargs):
-            queue = kwargs['queue']
-            queue.put({'status': StatusType.SUCCESS,
-                       'loss': 0.5,
-                       'additional_run_info': kwargs['instance']})
+            queue = kwargs["queue"]
+            queue.put(
+                {
+                    "status": StatusType.SUCCESS,
+                    "loss": 0.5,
+                    "additional_run_info": kwargs["instance"],
+                }
+            )
+
         eval_houldout_mock.side_effect = side_effect
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='fork',
-                                    )
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="fork",
+        )
         self.scenario.wallclock_limit = 180
         instance = "{'subsample': 30}"
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=instance,
-                                      instance_specific=None, seed=1, capped=False))
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=instance,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.SUCCESS)
         self.assertEqual(len(info[1].additional_info), 2)
-        self.assertIn('configuration_origin', info[1].additional_info)
-        self.assertEqual(info[1].additional_info['message'], "{'subsample': 30}")
+        self.assertIn("configuration_origin", info[1].additional_info)
+        self.assertEqual(info[1].additional_info["message"], "{'subsample': 30}")
 
-    @unittest.mock.patch('autosklearn.evaluation.train_evaluator.eval_holdout')
+    @unittest.mock.patch("autosklearn.evaluation.train_evaluator.eval_holdout")
     def test_exception_in_target_function(self, eval_holdout_mock):
         config = unittest.mock.Mock()
         config.config_id = 198
 
         eval_holdout_mock.side_effect = ValueError
-        ta = ExecuteTaFuncWithQueue(backend=self.backend, autosklearn_seed=1,
-                                    port=self.logger_port,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    pynisher_context='fork',
-                                    )
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            autosklearn_seed=1,
+            port=self.logger_port,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            pynisher_context="fork",
+        )
         self.stats.submitted_ta_runs += 1
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=30,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
         self.assertEqual(info[1].status, StatusType.CRASHED)
         self.assertEqual(info[1].cost, 1.0)
         self.assertIsInstance(info[1].time, float)
-        self.assertEqual(info[1].additional_info['error'], 'ValueError()')
-        self.assertIn('traceback', info[1].additional_info)
-        self.assertNotIn('exitcode', info[1].additional_info)
+        self.assertEqual(info[1].additional_info["error"], "ValueError()")
+        self.assertIn("traceback", info[1].additional_info)
+        self.assertNotIn("exitcode", info[1].additional_info)
 
     def test_silent_exception_in_target_function(self):
         config = unittest.mock.Mock()
         config.config_id = 198
 
-        delattr(self.backend, 'save_targets_ensemble')
-        ta = ExecuteTaFuncWithQueue(backend=self.backend,
-                                    port=self.logger_port,
-                                    autosklearn_seed=1,
-                                    resampling_strategy='holdout',
-                                    stats=self.stats,
-                                    memory_limit=3072,
-                                    metric=accuracy,
-                                    cost_for_crash=get_cost_of_crash(accuracy),
-                                    abort_on_first_run_crash=False,
-                                    iterative=False,
-                                    pynisher_context='fork',
-                                    )
+        delattr(self.backend, "save_targets_ensemble")
+        ta = ExecuteTaFuncWithQueue(
+            backend=self.backend,
+            port=self.logger_port,
+            autosklearn_seed=1,
+            resampling_strategy="holdout",
+            stats=self.stats,
+            memory_limit=3072,
+            metric=accuracy,
+            cost_for_crash=get_cost_of_crash(accuracy),
+            abort_on_first_run_crash=False,
+            iterative=False,
+            pynisher_context="fork",
+        )
         ta.pynisher_logger = unittest.mock.Mock()
         self.stats.submitted_ta_runs += 1
-        info = ta.run_wrapper(RunInfo(config=config, cutoff=3000, instance=None,
-                                      instance_specific=None, seed=1, capped=False))
-        self.assertEqual(info[1].status, StatusType.CRASHED, msg=str(info[1].additional_info))
+        info = ta.run_wrapper(
+            RunInfo(
+                config=config,
+                cutoff=3000,
+                instance=None,
+                instance_specific=None,
+                seed=1,
+                capped=False,
+            )
+        )
+        self.assertEqual(
+            info[1].status, StatusType.CRASHED, msg=str(info[1].additional_info)
+        )
         self.assertEqual(info[1].cost, 1.0)
         self.assertIsInstance(info[1].time, float)
         self.assertIn(
-            info[1].additional_info['error'],
+            info[1].additional_info["error"],
             (
                 """AttributeError("'BackendMock' object has no attribute """
                 """'save_targets_ensemble'",)""",
                 """AttributeError("'BackendMock' object has no attribute """
                 """'save_targets_ensemble'")""",
-                """AttributeError('save_targets_ensemble')"""
-            )
+                """AttributeError('save_targets_ensemble')""",
+            ),
         )
-        self.assertNotIn('exitcode', info[1].additional_info)
-        self.assertNotIn('exit_status', info[1].additional_info)
-        self.assertNotIn('traceback', info[1])
+        self.assertNotIn("exitcode", info[1].additional_info)
+        self.assertNotIn("exit_status", info[1].additional_info)
+        self.assertNotIn("traceback", info[1])
diff --git a/test/test_evaluation/test_test_evaluator.py b/test/test_evaluation/test_test_evaluator.py
index 93ea0c2265..0a1b67faa9 100644
--- a/test/test_evaluation/test_test_evaluator.py
+++ b/test/test_evaluation/test_test_evaluator.py
@@ -14,24 +14,26 @@
 from smac.tae import StatusType
 
 from autosklearn.automl_common.common.utils.backend import Backend
-
-from autosklearn.constants import MULTILABEL_CLASSIFICATION, BINARY_CLASSIFICATION, \
-    MULTICLASS_CLASSIFICATION, REGRESSION
+from autosklearn.constants import (
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    REGRESSION,
+)
 from autosklearn.evaluation.test_evaluator import TestEvaluator, eval_t
 from autosklearn.evaluation.util import read_queue
+from autosklearn.metrics import accuracy, f1_macro, r2
 from autosklearn.util.pipeline import get_configuration_space
-from autosklearn.metrics import accuracy, r2, f1_macro
 
 this_directory = os.path.dirname(__file__)
 sys.path.append(this_directory)
 from evaluation_util import (  # noqa (E402: module level import not at top of file)
-    get_evaluation_backend,
-    get_dataset_getters,
+    SCORER_LIST,
     BaseEvaluatorTest,
+    get_dataset_getters,
+    get_evaluation_backend,
     get_multiclass_classification_datamanager,
-    SCORER_LIST
-)  # noqa (E402: module level import not at top of file)
-
+)
 
 N_TEST_RUNS = 3
 
@@ -45,28 +47,31 @@ class TestEvaluator_Test(BaseEvaluatorTest, unittest.TestCase):
 
     def test_datasets(self):
         for getter in get_dataset_getters():
-            testname = '%s_%s' % (os.path.basename(__file__).
-                                  replace('.pyc', '').replace('.py', ''),
-                                  getter.__name__)
+            testname = "%s_%s" % (
+                os.path.basename(__file__).replace(".pyc", "").replace(".py", ""),
+                getter.__name__,
+            )
 
             with self.subTest(testname):
                 backend_mock = get_evaluation_backend()
                 D = getter()
                 D_ = copy.deepcopy(D)
-                y = D.data['Y_train']
+                y = D.data["Y_train"]
                 if len(y.shape) == 2 and y.shape[1] == 1:
-                    D_.data['Y_train'] = y.flatten()
+                    D_.data["Y_train"] = y.flatten()
                 backend_mock.load_datamanager.return_value = D_
-                metric_lookup = {MULTILABEL_CLASSIFICATION: f1_macro,
-                                 BINARY_CLASSIFICATION: accuracy,
-                                 MULTICLASS_CLASSIFICATION: accuracy,
-                                 REGRESSION: r2}
+                metric_lookup = {
+                    MULTILABEL_CLASSIFICATION: f1_macro,
+                    BINARY_CLASSIFICATION: accuracy,
+                    MULTICLASS_CLASSIFICATION: accuracy,
+                    REGRESSION: r2,
+                }
                 queue_ = multiprocessing.Queue()
 
                 evaluator = TestEvaluator(
                     backend_mock,
                     queue_,
-                    metric=metric_lookup[D.info['task']],
+                    metric=metric_lookup[D.info["task"]],
                     port=logging.handlers.DEFAULT_TCP_LOGGING_PORT,
                     additional_components=dict(),
                 )
@@ -75,22 +80,21 @@ def test_datasets(self):
                 rval = read_queue(evaluator.queue)
                 self.assertEqual(len(rval), 1)
                 self.assertEqual(len(rval[0]), 3)
-                self.assertTrue(np.isfinite(rval[0]['loss']))
+                self.assertTrue(np.isfinite(rval[0]["loss"]))
 
 
 class FunctionsTest(unittest.TestCase):
     def setUp(self):
         self.queue = multiprocessing.Queue()
         self.configuration = get_configuration_space(
-            {'task': MULTICLASS_CLASSIFICATION,
-             'is_sparse': False}).get_default_configuration()
+            {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
+        ).get_default_configuration()
         self.data = get_multiclass_classification_datamanager()
-        self.tmp_dir = os.path.join(os.path.dirname(__file__),
-                                    '.test_cv_functions')
+        self.tmp_dir = os.path.join(os.path.dirname(__file__), ".test_cv_functions")
         self.backend = unittest.mock.Mock(spec=Backend)
         self.backend.temporary_directory = tempfile.gettempdir()
         self.backend.load_datamanager.return_value = self.data
-        self.dataset_name = json.dumps({'task_id': 'test'})
+        self.dataset_name = json.dumps({"task_id": "test"})
 
         self.port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
 
@@ -106,7 +110,8 @@ def test_eval_test(self):
             backend=self.backend,
             config=self.configuration,
             metric=accuracy,
-            seed=1, num_run=1,
+            seed=1,
+            num_run=1,
             scoring_functions=None,
             output_y_hat_optimization=False,
             include=None,
@@ -118,9 +123,9 @@ def test_eval_test(self):
         )
         rval = read_queue(self.queue)
         self.assertEqual(len(rval), 1)
-        self.assertAlmostEqual(rval[0]['loss'], 0.040000000000000036)
-        self.assertEqual(rval[0]['status'], StatusType.SUCCESS)
-        self.assertNotIn('bac_metric', rval[0]['additional_run_info'])
+        self.assertAlmostEqual(rval[0]["loss"], 0.040000000000000036)
+        self.assertEqual(rval[0]["status"], StatusType.SUCCESS)
+        self.assertNotIn("bac_metric", rval[0]["additional_run_info"])
 
     def test_eval_test_all_loss_functions(self):
         eval_t(
@@ -128,7 +133,8 @@ def test_eval_test_all_loss_functions(self):
             backend=self.backend,
             config=self.configuration,
             metric=accuracy,
-            seed=1, num_run=1,
+            seed=1,
+            num_run=1,
             scoring_functions=SCORER_LIST,
             output_y_hat_optimization=False,
             include=None,
@@ -142,25 +148,30 @@ def test_eval_test_all_loss_functions(self):
         self.assertEqual(len(rval), 1)
 
         # Note: All metric here should be minimized
-        fixture = {'accuracy': 0.040000000000000036,
-                   'balanced_accuracy': 0.02777777777777779,
-                   'f1_macro': 0.0341005967604433,
-                   'f1_micro': 0.040000000000000036,
-                   'f1_weighted': 0.039693094629155934,
-                   'log_loss': 0.13966929787769913,
-                   'precision_macro': 0.03703703703703709,
-                   'precision_micro': 0.040000000000000036,
-                   'precision_weighted': 0.03555555555555556,
-                   'recall_macro': 0.02777777777777779,
-                   'recall_micro': 0.040000000000000036,
-                   'recall_weighted': 0.040000000000000036,
-                   'num_run': -1}
-
-        additional_run_info = rval[0]['additional_run_info']
+        fixture = {
+            "accuracy": 0.040000000000000036,
+            "balanced_accuracy": 0.02777777777777779,
+            "f1_macro": 0.0341005967604433,
+            "f1_micro": 0.040000000000000036,
+            "f1_weighted": 0.039693094629155934,
+            "log_loss": 0.13966929787769913,
+            "precision_macro": 0.03703703703703709,
+            "precision_micro": 0.040000000000000036,
+            "precision_weighted": 0.03555555555555556,
+            "recall_macro": 0.02777777777777779,
+            "recall_micro": 0.040000000000000036,
+            "recall_weighted": 0.040000000000000036,
+            "num_run": -1,
+        }
+
+        additional_run_info = rval[0]["additional_run_info"]
         for key, value in fixture.items():
             self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key)
-        self.assertEqual(len(additional_run_info), len(fixture) + 1,
-                         msg=sorted(additional_run_info.items()))
-        self.assertIn('duration', additional_run_info)
-        self.assertAlmostEqual(rval[0]['loss'], 0.040000000000000036)
-        self.assertEqual(rval[0]['status'], StatusType.SUCCESS)
+        self.assertEqual(
+            len(additional_run_info),
+            len(fixture) + 1,
+            msg=sorted(additional_run_info.items()),
+        )
+        self.assertIn("duration", additional_run_info)
+        self.assertAlmostEqual(rval[0]["loss"], 0.040000000000000036)
+        self.assertEqual(rval[0]["status"], StatusType.SUCCESS)
diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py
index 28bddcdb09..92e3cfcc10 100644
--- a/test/test_evaluation/test_train_evaluator.py
+++ b/test/test_evaluation/test_train_evaluator.py
@@ -1,49 +1,73 @@
 import copy
 import json
 import logging.handlers
-import queue
 import multiprocessing
 import os
-import tempfile
+import queue
 import shutil
 import sys
+import tempfile
 import unittest
 import unittest.mock
 
-from ConfigSpace import Configuration
 import numpy as np
-from sklearn.model_selection import GroupKFold, GroupShuffleSplit, \
-    KFold, LeaveOneGroupOut, LeavePGroupsOut, LeaveOneOut, LeavePOut, \
-    PredefinedSplit, RepeatedKFold, RepeatedStratifiedKFold, ShuffleSplit, \
-    StratifiedKFold, StratifiedShuffleSplit, TimeSeriesSplit
 import sklearn.model_selection
+from ConfigSpace import Configuration
+from sklearn.model_selection import (
+    GroupKFold,
+    GroupShuffleSplit,
+    KFold,
+    LeaveOneGroupOut,
+    LeaveOneOut,
+    LeavePGroupsOut,
+    LeavePOut,
+    PredefinedSplit,
+    RepeatedKFold,
+    RepeatedStratifiedKFold,
+    ShuffleSplit,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    TimeSeriesSplit,
+)
 from smac.tae import StatusType, TAEAbortException
 
-from autosklearn.automl_common.common.utils import backend
-
 import autosklearn.evaluation.splitter
+from autosklearn.automl_common.common.utils import backend
+from autosklearn.constants import (
+    BINARY_CLASSIFICATION,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    MULTIOUTPUT_REGRESSION,
+    REGRESSION,
+)
 from autosklearn.data.abstract_data_manager import AbstractDataManager
+from autosklearn.evaluation.train_evaluator import (
+    TrainEvaluator,
+    eval_cv,
+    eval_holdout,
+    eval_iterative_holdout,
+    eval_partial_cv,
+    subsample_indices,
+)
 from autosklearn.evaluation.util import read_queue
-from autosklearn.evaluation.train_evaluator import TrainEvaluator, \
-    eval_holdout, eval_iterative_holdout, eval_cv, eval_partial_cv, subsample_indices
+from autosklearn.metrics import accuracy, f1_macro, r2
 from autosklearn.util.pipeline import get_configuration_space
-from autosklearn.constants import BINARY_CLASSIFICATION, \
-    MULTILABEL_CLASSIFICATION,\
-    MULTICLASS_CLASSIFICATION,\
-    REGRESSION,\
-    MULTIOUTPUT_REGRESSION
-from autosklearn.metrics import accuracy, r2, f1_macro
 
 this_directory = os.path.dirname(__file__)
 sys.path.append(this_directory)
-from evaluation_util import get_regression_datamanager, BaseEvaluatorTest, \
-    get_binary_classification_datamanager, get_dataset_getters, \
-    get_multiclass_classification_datamanager, SCORER_LIST  # noqa (E402: module level import not at top of file)
+from evaluation_util import (  # noqa (E402: module level import not at top of file)
+    SCORER_LIST,
+    BaseEvaluatorTest,
+    get_binary_classification_datamanager,
+    get_dataset_getters,
+    get_multiclass_classification_datamanager,
+    get_regression_datamanager,
+)
 
 
 class Dummy(object):
     def __init__(self):
-        self.name = 'dummy'
+        self.name = "dummy"
 
 
 class TestTrainEvaluator(BaseEvaluatorTest, unittest.TestCase):
@@ -54,13 +78,15 @@ def setUp(self):
         Creates a backend mock
         """
         tmp_dir_name = self.id()
-        self.ev_path = os.path.join(this_directory, '.tmp_evaluations', tmp_dir_name)
+        self.ev_path = os.path.join(this_directory, ".tmp_evaluations", tmp_dir_name)
         if os.path.exists(self.ev_path):
             shutil.rmtree(self.ev_path)
         os.makedirs(self.ev_path, exist_ok=False)
         dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)]
         dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)]
-        dummy_cv_model_files = [os.path.join(self.ev_path, str(n)) for n in range(200, 300)]
+        dummy_cv_model_files = [
+            os.path.join(self.ev_path, str(n)) for n in range(200, 300)
+        ]
         backend_mock = unittest.mock.Mock()
         backend_mock.temporary_directory = tempfile.gettempdir()
         backend_mock.get_model_dir.return_value = self.ev_path
@@ -70,7 +96,7 @@ def setUp(self):
         backend_mock.get_prediction_output_path.side_effect = dummy_pred_files
         self.backend_mock = backend_mock
 
-        self.tmp_dir = os.path.join(self.ev_path, 'tmp_dir')
+        self.tmp_dir = os.path.join(self.ev_path, "tmp_dir")
 
         self.port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
 
@@ -78,15 +104,18 @@ def tearDown(self):
         if os.path.exists(self.ev_path):
             shutil.rmtree(self.ev_path)
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_holdout(self, pipeline_mock):
         # Binary iris, contains 69 train samples, 25 validation samples,
         # 6 test samples
         D = get_binary_classification_datamanager()
-        D.name = 'test'
+        D.name = "test"
 
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.predict_proba.side_effect = lambda X, batch_size=None: np.tile(
+            [0.6, 0.4], (len(X), 1)
+        )
         pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
         pipeline_mock.get_additional_run_info.return_value = None
         pipeline_mock.get_max_iter.return_value = 1
@@ -96,21 +125,23 @@ def test_holdout(self, pipeline_mock):
         backend_api = backend.create(
             temporary_directory=self.tmp_dir,
             output_directory=None,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
         backend_api.load_datamanager = lambda: D
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(backend_api, queue_,
-                                   configuration=configuration,
-                                   resampling_strategy='holdout',
-                                   resampling_strategy_args={'train_size': 0.66},
-                                   scoring_functions=None,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   port=self.port,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_api,
+            queue_,
+            configuration=configuration,
+            resampling_strategy="holdout",
+            resampling_strategy_args={"train_size": 0.66},
+            scoring_functions=None,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            port=self.port,
+            additional_components=dict(),
+        )
         evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
         evaluator.file_output.return_value = (None, {})
 
@@ -118,7 +149,7 @@ def test_holdout(self, pipeline_mock):
 
         rval = read_queue(evaluator.queue)
         self.assertEqual(len(rval), 1)
-        result = rval[0]['loss']
+        result = rval[0]["loss"]
         self.assertEqual(len(rval[0]), 3)
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
@@ -129,17 +160,21 @@ def test_holdout(self, pipeline_mock):
         self.assertEqual(pipeline_mock.predict_proba.call_count, 4)
         self.assertEqual(evaluator.file_output.call_count, 1)
         self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 24)
-        self.assertEqual(evaluator.file_output.call_args[0][1].shape[0],
-                         D.data['Y_valid'].shape[0])
-        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
-                         D.data['Y_test'].shape[0])
+        self.assertEqual(
+            evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0]
+        )
+        self.assertEqual(
+            evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0]
+        )
         self.assertEqual(evaluator.model.fit.call_count, 1)
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_iterative_holdout(self, pipeline_mock):
         # Regular fitting
         D = get_binary_classification_datamanager()
-        D.name = 'test'
+        D.name = "test"
 
         class SideEffect(object):
             def __init__(self):
@@ -152,55 +187,100 @@ def configuration_fully_fitted(self):
                 # final call to iterative fit
                 return self.fully_fitted_call_count > 18
 
-        Xt_fixture = 'Xt_fixture'
+        Xt_fixture = "Xt_fixture"
         pipeline_mock.estimator_supports_iterative_fit.return_value = True
-        pipeline_mock.configuration_fully_fitted.side_effect = \
+        pipeline_mock.configuration_fully_fitted.side_effect = (
             SideEffect().configuration_fully_fitted
+        )
         pipeline_mock.fit_transformer.return_value = Xt_fixture, {}
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.predict_proba.side_effect = lambda X, batch_size=None: np.tile(
+            [0.6, 0.4], (len(X), 1)
+        )
         pipeline_mock.get_additional_run_info.return_value = None
         pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
         pipeline_mock.get_max_iter.return_value = 512
-        pipeline_mock.get_current_iter.side_effect = (2, 4, 8, 16, 32, 64, 128, 256, 512)
+        pipeline_mock.get_current_iter.side_effect = (
+            2,
+            4,
+            8,
+            16,
+            32,
+            64,
+            128,
+            256,
+            512,
+        )
 
         configuration = unittest.mock.Mock(spec=Configuration)
         backend_api = backend.create(
             temporary_directory=self.tmp_dir,
             output_directory=None,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
         backend_api.load_datamanager = lambda: D
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(backend_api, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='holdout',
-                                   scoring_functions=None,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   budget=0.0,
-                                   additional_components=dict(),)
+        evaluator = TrainEvaluator(
+            backend_api,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="holdout",
+            scoring_functions=None,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            budget=0.0,
+            additional_components=dict(),
+        )
         evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
         evaluator.file_output.return_value = (None, {})
 
         class LossSideEffect(object):
             def __init__(self):
-                self.losses = [1.0, 1.0, 1.0, 1.0,
-                               0.9, 0.9, 0.9, 0.9,
-                               0.8, 0.8, 0.8, 0.8,
-                               0.7, 0.7, 0.7, 0.7,
-                               0.6, 0.6, 0.6, 0.6,
-                               0.5, 0.5, 0.5, 0.5,
-                               0.4, 0.4, 0.4, 0.4,
-                               0.3, 0.3, 0.3, 0.3,
-                               0.2, 0.2, 0.2, 0.2]
+                self.losses = [
+                    1.0,
+                    1.0,
+                    1.0,
+                    1.0,
+                    0.9,
+                    0.9,
+                    0.9,
+                    0.9,
+                    0.8,
+                    0.8,
+                    0.8,
+                    0.8,
+                    0.7,
+                    0.7,
+                    0.7,
+                    0.7,
+                    0.6,
+                    0.6,
+                    0.6,
+                    0.6,
+                    0.5,
+                    0.5,
+                    0.5,
+                    0.5,
+                    0.4,
+                    0.4,
+                    0.4,
+                    0.4,
+                    0.3,
+                    0.3,
+                    0.3,
+                    0.3,
+                    0.2,
+                    0.2,
+                    0.2,
+                    0.2,
+                ]
                 self.iteration = 0
 
             def side_effect(self, *args, **kwargs):
                 self.iteration += 1
                 return self.losses[self.iteration - 1]
+
         evaluator._loss = unittest.mock.Mock()
         evaluator._loss.side_effect = LossSideEffect().side_effect
 
@@ -209,38 +289,42 @@ def side_effect(self, *args, **kwargs):
 
         for i in range(1, 10):
             rval = evaluator.queue.get(timeout=1)
-            result = rval['loss']
+            result = rval["loss"]
             self.assertAlmostEqual(result, 1.0 - (0.1 * (i - 1)))
             if i < 9:
-                self.assertEqual(rval['status'], StatusType.DONOTADVANCE)
+                self.assertEqual(rval["status"], StatusType.DONOTADVANCE)
                 self.assertEqual(len(rval), 3)
             else:
-                self.assertEqual(rval['status'], StatusType.SUCCESS)
+                self.assertEqual(rval["status"], StatusType.SUCCESS)
                 self.assertEqual(len(rval), 4)
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
         self.assertEqual(pipeline_mock.iterative_fit.call_count, 9)
         self.assertEqual(
-            [cal[1]['n_iter'] for cal in pipeline_mock.iterative_fit.call_args_list],
-            [2, 2, 4, 8, 16, 32, 64, 128, 256]
+            [cal[1]["n_iter"] for cal in pipeline_mock.iterative_fit.call_args_list],
+            [2, 2, 4, 8, 16, 32, 64, 128, 256],
         )
         # 20 calls because of train, holdout, validation and test set
         # and a total of five calls because of five iterations of fitting
         self.assertEqual(evaluator.model.predict_proba.call_count, 36)
         # 1/3 of 69
         self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23)
-        self.assertEqual(evaluator.file_output.call_args[0][1].shape[0],
-                         D.data['Y_valid'].shape[0])
-        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
-                         D.data['Y_test'].shape[0])
+        self.assertEqual(
+            evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0]
+        )
+        self.assertEqual(
+            evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0]
+        )
         self.assertEqual(evaluator.file_output.call_count, 9)
         self.assertEqual(evaluator.model.fit.call_count, 0)
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_iterative_holdout_interuption(self, pipeline_mock):
         # Regular fitting
         D = get_binary_classification_datamanager()
-        D.name = 'test'
+        D.name = "test"
 
         class SideEffect(object):
             def __init__(self):
@@ -252,61 +336,93 @@ def configuration_fully_fitted(self):
                 # if we need to add a special indicator to show that this is the
                 # final call to iterative fit
                 if self.fully_fitted_call_count == 5:
-                    raise ValueError('fixture')
+                    raise ValueError("fixture")
                 return self.fully_fitted_call_count > 10
 
-        Xt_fixture = 'Xt_fixture'
+        Xt_fixture = "Xt_fixture"
         pipeline_mock.estimator_supports_iterative_fit.return_value = True
-        pipeline_mock.configuration_fully_fitted.side_effect = \
+        pipeline_mock.configuration_fully_fitted.side_effect = (
             SideEffect().configuration_fully_fitted
+        )
         pipeline_mock.fit_transformer.return_value = Xt_fixture, {}
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.predict_proba.side_effect = lambda X, batch_size=None: np.tile(
+            [0.6, 0.4], (len(X), 1)
+        )
         pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
         pipeline_mock.get_additional_run_info.return_value = None
         pipeline_mock.get_max_iter.return_value = 512
-        pipeline_mock.get_current_iter.side_effect = (2, 4, 8, 16, 32, 64, 128, 256, 512)
+        pipeline_mock.get_current_iter.side_effect = (
+            2,
+            4,
+            8,
+            16,
+            32,
+            64,
+            128,
+            256,
+            512,
+        )
 
         configuration = unittest.mock.Mock(spec=Configuration)
         backend_api = backend.create(
             temporary_directory=self.tmp_dir,
             output_directory=None,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
         backend_api.load_datamanager = lambda: D
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(backend_api, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='holdout-iterative-fit',
-                                   scoring_functions=None,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   budget=0.0,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_api,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="holdout-iterative-fit",
+            scoring_functions=None,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            budget=0.0,
+            additional_components=dict(),
+        )
         evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
         evaluator.file_output.return_value = (None, {})
 
         class LossSideEffect(object):
             def __init__(self):
-                self.losses = [0.8, 0.8, 0.8, 0.8,
-                               0.6, 0.6, 0.6, 0.6,
-                               0.4, 0.4, 0.4, 0.4,
-                               0.2, 0.2, 0.2, 0.2,
-                               0.0, 0.0, 0.0, 0.0]
+                self.losses = [
+                    0.8,
+                    0.8,
+                    0.8,
+                    0.8,
+                    0.6,
+                    0.6,
+                    0.6,
+                    0.6,
+                    0.4,
+                    0.4,
+                    0.4,
+                    0.4,
+                    0.2,
+                    0.2,
+                    0.2,
+                    0.2,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ]
                 self.iteration = 0
 
             def side_effect(self, *args, **kwargs):
                 self.iteration += 1
                 return self.losses[self.iteration - 1]
+
         evaluator._loss = unittest.mock.Mock()
         evaluator._loss.side_effect = LossSideEffect().side_effect
 
         self.assertRaisesRegex(
             ValueError,
-            'fixture',
+            "fixture",
             evaluator.fit_predict_and_loss,
             iterative=True,
         )
@@ -314,7 +430,7 @@ def side_effect(self, *args, **kwargs):
 
         for i in range(1, 3):
             rval = evaluator.queue.get(timeout=1)
-            self.assertAlmostEqual(rval['loss'], 1.0 - (0.2 * i))
+            self.assertAlmostEqual(rval["loss"], 1.0 - (0.2 * i))
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
         self.assertEqual(pipeline_mock.iterative_fit.call_count, 2)
@@ -322,24 +438,29 @@ def side_effect(self, *args, **kwargs):
         # and a total of two calls each because of two iterations of fitting
         self.assertEqual(evaluator.model.predict_proba.call_count, 8)
         self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23)
-        self.assertEqual(evaluator.file_output.call_args[0][1].shape[0],
-                         D.data['Y_valid'].shape[0])
-        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
-                         D.data['Y_test'].shape[0])
+        self.assertEqual(
+            evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0]
+        )
+        self.assertEqual(
+            evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0]
+        )
         self.assertEqual(evaluator.file_output.call_count, 2)
         self.assertEqual(evaluator.model.fit.call_count, 0)
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_iterative_holdout_not_iterative(self, pipeline_mock):
         # Regular fitting
         D = get_binary_classification_datamanager()
-        D.name = 'test'
+        D.name = "test"
 
-        Xt_fixture = 'Xt_fixture'
+        Xt_fixture = "Xt_fixture"
         pipeline_mock.estimator_supports_iterative_fit.return_value = False
         pipeline_mock.fit_transformer.return_value = Xt_fixture, {}
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.predict_proba.side_effect = lambda X, batch_size=None: np.tile(
+            [0.6, 0.4], (len(X), 1)
+        )
         pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
         pipeline_mock.get_additional_run_info.return_value = None
 
@@ -347,20 +468,22 @@ def test_iterative_holdout_not_iterative(self, pipeline_mock):
         backend_api = backend.create(
             temporary_directory=self.tmp_dir,
             output_directory=None,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
         backend_api.load_datamanager = lambda: D
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(backend_api, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='holdout-iterative-fit',
-                                   scoring_functions=None,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_api,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="holdout-iterative-fit",
+            scoring_functions=None,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            additional_components=dict(),
+        )
         evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
         evaluator.file_output.return_value = (None, {})
 
@@ -368,26 +491,31 @@ def test_iterative_holdout_not_iterative(self, pipeline_mock):
         self.assertEqual(evaluator.file_output.call_count, 1)
 
         rval = evaluator.queue.get(timeout=1)
-        self.assertAlmostEqual(rval['loss'], 0.47826086956521741)
+        self.assertAlmostEqual(rval["loss"], 0.47826086956521741)
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
         self.assertEqual(pipeline_mock.iterative_fit.call_count, 0)
         # four calls for train, opt, valid and test
         self.assertEqual(evaluator.model.predict_proba.call_count, 4)
         self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23)
-        self.assertEqual(evaluator.file_output.call_args[0][1].shape[0],
-                         D.data['Y_valid'].shape[0])
-        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
-                         D.data['Y_test'].shape[0])
+        self.assertEqual(
+            evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0]
+        )
+        self.assertEqual(
+            evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0]
+        )
         self.assertEqual(evaluator.file_output.call_count, 1)
         self.assertEqual(evaluator.model.fit.call_count, 1)
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_cv(self, pipeline_mock):
         D = get_binary_classification_datamanager()
 
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.predict_proba.side_effect = lambda X, batch_size=None: np.tile(
+            [0.6, 0.4], (len(X), 1)
+        )
         pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
         pipeline_mock.get_additional_run_info.return_value = None
 
@@ -395,21 +523,23 @@ def test_cv(self, pipeline_mock):
         backend_api = backend.create(
             temporary_directory=self.tmp_dir,
             output_directory=None,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
         backend_api.load_datamanager = lambda: D
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(backend_api, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='cv',
-                                   resampling_strategy_args={'folds': 5},
-                                   scoring_functions=None,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_api,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 5},
+            scoring_functions=None,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            additional_components=dict(),
+        )
         evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
         evaluator.file_output.return_value = (None, {})
 
@@ -417,7 +547,7 @@ def test_cv(self, pipeline_mock):
 
         rval = read_queue(evaluator.queue)
         self.assertEqual(len(rval), 1)
-        result = rval[0]['loss']
+        result = rval[0]["loss"]
         self.assertEqual(len(rval[0]), 3)
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
@@ -427,49 +557,57 @@ def test_cv(self, pipeline_mock):
         # Fifteen calls because of the training, holdout, validation and
         # test set (4 sets x 5 folds = 20)
         self.assertEqual(pipeline_mock.predict_proba.call_count, 20)
-        self.assertEqual(evaluator.file_output.call_args[0][0].shape[0],
-                         D.data['Y_train'].shape[0])
-        self.assertEqual(evaluator.file_output.call_args[0][1].shape[0],
-                         D.data['Y_valid'].shape[0])
-        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
-                         D.data['Y_test'].shape[0])
+        self.assertEqual(
+            evaluator.file_output.call_args[0][0].shape[0], D.data["Y_train"].shape[0]
+        )
+        self.assertEqual(
+            evaluator.file_output.call_args[0][1].shape[0], D.data["Y_valid"].shape[0]
+        )
+        self.assertEqual(
+            evaluator.file_output.call_args[0][2].shape[0], D.data["Y_test"].shape[0]
+        )
         # The model prior to fitting is saved, this cannot be directly tested
         # because of the way the mock module is used. Instead, we test whether
         # the if block in which model assignment is done is accessed
         self.assertTrue(evaluator._added_empty_model)
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_partial_cv(self, pipeline_mock):
         D = get_binary_classification_datamanager()
 
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.predict_proba.side_effect = lambda X, batch_size=None: np.tile(
+            [0.6, 0.4], (len(X), 1)
+        )
         pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
         pipeline_mock.get_additional_run_info.return_value = None
         pipeline_mock.get_max_iter.return_value = 1
         pipeline_mock.get_current_iter.return_value = 1
         D = get_binary_classification_datamanager()
-        D.name = 'test'
+        D.name = "test"
 
         configuration = unittest.mock.Mock(spec=Configuration)
         backend_api = backend.create(
             temporary_directory=self.tmp_dir,
             output_directory=None,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
         backend_api.load_datamanager = lambda: D
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(backend_api, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='partial-cv',
-                                   resampling_strategy_args={'folds': 5},
-                                   scoring_functions=None,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_api,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="partial-cv",
+            resampling_strategy_args={"folds": 5},
+            scoring_functions=None,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            additional_components=dict(),
+        )
 
         evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
         evaluator.file_output.return_value = (None, {})
@@ -480,19 +618,21 @@ def test_partial_cv(self, pipeline_mock):
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
         self.assertEqual(evaluator.file_output.call_count, 0)
-        self.assertEqual(rval['loss'], 0.5)
+        self.assertEqual(rval["loss"], 0.5)
         self.assertEqual(pipeline_mock.fit.call_count, 1)
         self.assertEqual(pipeline_mock.predict_proba.call_count, 4)
         # The model prior to fitting is saved, this cannot be directly tested
         # because of the way the mock module is used. Instead, we test whether
         # the if block in which model assignment is done is accessed
-        self.assertTrue(hasattr(evaluator, 'model'))
+        self.assertTrue(hasattr(evaluator, "model"))
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_iterative_partial_cv(self, pipeline_mock):
         # Regular fitting
         D = get_binary_classification_datamanager()
-        D.name = 'test'
+        D.name = "test"
 
         class SideEffect(object):
             def __init__(self):
@@ -505,57 +645,101 @@ def configuration_fully_fitted(self):
                 # final call to iterative fit
                 return self.fully_fitted_call_count > 18
 
-        Xt_fixture = 'Xt_fixture'
+        Xt_fixture = "Xt_fixture"
         pipeline_mock.estimator_supports_iterative_fit.return_value = True
-        pipeline_mock.configuration_fully_fitted.side_effect = \
+        pipeline_mock.configuration_fully_fitted.side_effect = (
             SideEffect().configuration_fully_fitted
+        )
         pipeline_mock.fit_transformer.return_value = Xt_fixture, {}
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.predict_proba.side_effect = lambda X, batch_size=None: np.tile(
+            [0.6, 0.4], (len(X), 1)
+        )
         pipeline_mock.get_additional_run_info.return_value = None
         pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
         pipeline_mock.get_max_iter.return_value = 512
-        pipeline_mock.get_current_iter.side_effect = (2, 4, 8, 16, 32, 64, 128, 256, 512)
+        pipeline_mock.get_current_iter.side_effect = (
+            2,
+            4,
+            8,
+            16,
+            32,
+            64,
+            128,
+            256,
+            512,
+        )
 
         configuration = unittest.mock.Mock(spec=Configuration)
         backend_api = backend.create(
             temporary_directory=self.tmp_dir,
             output_directory=None,
-            prefix="auto-sklearn"
+            prefix="auto-sklearn",
         )
         backend_api.load_datamanager = lambda: D
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(backend_api, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='partial-cv-iterative-fit',
-                                   resampling_strategy_args={'folds': 5},
-                                   scoring_functions=None,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   budget=0.0,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_api,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="partial-cv-iterative-fit",
+            resampling_strategy_args={"folds": 5},
+            scoring_functions=None,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            budget=0.0,
+            additional_components=dict(),
+        )
         evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
         evaluator.file_output.return_value = (None, {})
 
         class LossSideEffect(object):
             def __init__(self):
-                self.losses = [1.0, 1.0, 1.0, 1.0,
-                               0.9, 0.9, 0.9, 0.9,
-                               0.8, 0.8, 0.8, 0.8,
-                               0.7, 0.7, 0.7, 0.7,
-                               0.6, 0.6, 0.6, 0.6,
-                               0.5, 0.5, 0.5, 0.5,
-                               0.4, 0.4, 0.4, 0.4,
-                               0.3, 0.3, 0.3, 0.3,
-                               0.2, 0.2, 0.2, 0.2]
+                self.losses = [
+                    1.0,
+                    1.0,
+                    1.0,
+                    1.0,
+                    0.9,
+                    0.9,
+                    0.9,
+                    0.9,
+                    0.8,
+                    0.8,
+                    0.8,
+                    0.8,
+                    0.7,
+                    0.7,
+                    0.7,
+                    0.7,
+                    0.6,
+                    0.6,
+                    0.6,
+                    0.6,
+                    0.5,
+                    0.5,
+                    0.5,
+                    0.5,
+                    0.4,
+                    0.4,
+                    0.4,
+                    0.4,
+                    0.3,
+                    0.3,
+                    0.3,
+                    0.3,
+                    0.2,
+                    0.2,
+                    0.2,
+                    0.2,
+                ]
                 self.iteration = 0
 
             def side_effect(self, *args, **kwargs):
                 self.iteration += 1
                 return self.losses[self.iteration - 1]
+
         evaluator._loss = unittest.mock.Mock()
         evaluator._loss.side_effect = LossSideEffect().side_effect
 
@@ -565,118 +749,145 @@ def side_effect(self, *args, **kwargs):
 
         for i in range(1, 10):
             rval = evaluator.queue.get(timeout=1)
-            self.assertAlmostEqual(rval['loss'], 1.0 - (0.1 * (i - 1)))
+            self.assertAlmostEqual(rval["loss"], 1.0 - (0.1 * (i - 1)))
             if i < 9:
-                self.assertEqual(rval['status'], StatusType.DONOTADVANCE)
+                self.assertEqual(rval["status"], StatusType.DONOTADVANCE)
             else:
-                self.assertEqual(rval['status'], StatusType.SUCCESS)
+                self.assertEqual(rval["status"], StatusType.SUCCESS)
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
         self.assertEqual(pipeline_mock.iterative_fit.call_count, 9)
         self.assertEqual(
-            [cal[1]['n_iter'] for cal in pipeline_mock.iterative_fit.call_args_list],
-            [2, 2, 4, 8, 16, 32, 64, 128, 256]
+            [cal[1]["n_iter"] for cal in pipeline_mock.iterative_fit.call_args_list],
+            [2, 2, 4, 8, 16, 32, 64, 128, 256],
         )
         # fifteen calls because of the holdout, the validation and the test set
         # and a total of five calls because of five iterations of fitting
-        self.assertTrue(hasattr(evaluator, 'model'))
+        self.assertTrue(hasattr(evaluator, "model"))
         self.assertEqual(pipeline_mock.iterative_fit.call_count, 9)
         # 20 calls because of train, holdout, the validation and the test set
         # and a total of five calls because of five iterations of fitting
         self.assertEqual(pipeline_mock.predict_proba.call_count, 36)
 
-    @unittest.mock.patch.object(TrainEvaluator, '_loss')
-    @unittest.mock.patch.object(TrainEvaluator, '_get_model')
+    @unittest.mock.patch.object(TrainEvaluator, "_loss")
+    @unittest.mock.patch.object(TrainEvaluator, "_get_model")
     def test_file_output(self, loss_mock, model_mock):
 
         D = get_regression_datamanager()
-        D.name = 'test'
+        D.name = "test"
         self.backend_mock.load_datamanager.return_value = D
         configuration = unittest.mock.Mock(spec=Configuration)
         queue_ = multiprocessing.Queue()
         loss_mock.return_value = None
         model_mock.return_value = None
 
-        evaluator = TrainEvaluator(self.backend_mock, queue=queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='cv',
-                                   resampling_strategy_args={'folds': 5},
-                                   scoring_functions=SCORER_LIST,
-                                   output_y_hat_optimization=True,
-                                   metric=accuracy,
-                                   additional_components=dict(),)
+        evaluator = TrainEvaluator(
+            self.backend_mock,
+            queue=queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 5},
+            scoring_functions=SCORER_LIST,
+            output_y_hat_optimization=True,
+            metric=accuracy,
+            additional_components=dict(),
+        )
 
         self.backend_mock.get_model_dir.return_value = True
-        evaluator.model = 'model'
-        evaluator.Y_optimization = D.data['Y_train']
+        evaluator.model = "model"
+        evaluator.Y_optimization = D.data["Y_train"]
         rval = evaluator.file_output(
-            D.data['Y_train'],
-            D.data['Y_valid'],
-            D.data['Y_test'],
+            D.data["Y_train"],
+            D.data["Y_valid"],
+            D.data["Y_test"],
         )
 
         self.assertEqual(rval, (None, {}))
         self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 1)
         self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1)
-        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
-                         {'seed', 'idx', 'budget', 'model', 'cv_model',
-                          'ensemble_predictions', 'valid_predictions', 'test_predictions'})
-        self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
-        self.assertIsNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+        self.assertEqual(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
+            {
+                "seed",
+                "idx",
+                "budget",
+                "model",
+                "cv_model",
+                "ensemble_predictions",
+                "valid_predictions",
+                "test_predictions",
+            },
+        )
+        self.assertIsNotNone(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["model"]
+        )
+        self.assertIsNone(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["cv_model"]
+        )
 
-        evaluator.models = ['model2', 'model2']
+        evaluator.models = ["model2", "model2"]
         rval = evaluator.file_output(
-            D.data['Y_train'],
-            D.data['Y_valid'],
-            D.data['Y_test'],
+            D.data["Y_train"],
+            D.data["Y_valid"],
+            D.data["Y_test"],
         )
         self.assertEqual(rval, (None, {}))
         self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 2)
         self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 2)
-        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
-                         {'seed', 'idx', 'budget', 'model', 'cv_model',
-                          'ensemble_predictions', 'valid_predictions', 'test_predictions'})
-        self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
-        self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+        self.assertEqual(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
+            {
+                "seed",
+                "idx",
+                "budget",
+                "model",
+                "cv_model",
+                "ensemble_predictions",
+                "valid_predictions",
+                "test_predictions",
+            },
+        )
+        self.assertIsNotNone(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["model"]
+        )
+        self.assertIsNotNone(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]["cv_model"]
+        )
 
         # Check for not containing NaNs - that the models don't predict nonsense
         # for unseen data
-        D.data['Y_valid'][0] = np.NaN
+        D.data["Y_valid"][0] = np.NaN
         rval = evaluator.file_output(
-            D.data['Y_train'],
-            D.data['Y_valid'],
-            D.data['Y_test'],
+            D.data["Y_train"],
+            D.data["Y_valid"],
+            D.data["Y_test"],
         )
         self.assertEqual(
             rval,
             (
                 1.0,
-                {
-                    'error':
-                    'Model predictions for validation set contains NaNs.'
-                },
-            )
+                {"error": "Model predictions for validation set contains NaNs."},
+            ),
         )
-        D.data['Y_train'][0] = np.NaN
+        D.data["Y_train"][0] = np.NaN
         rval = evaluator.file_output(
-            D.data['Y_train'],
-            D.data['Y_valid'],
-            D.data['Y_test'],
+            D.data["Y_train"],
+            D.data["Y_valid"],
+            D.data["Y_test"],
         )
         self.assertEqual(
             rval,
             (
                 1.0,
-                {
-                    'error':
-                    'Model predictions for optimization set contains NaNs.'
-                 },
-            )
+                {"error": "Model predictions for optimization set contains NaNs."},
+            ),
         )
 
-    @unittest.mock.patch('autosklearn.automl_common.common.utils.backend.Backend')
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch("autosklearn.automl_common.common.utils.backend.Backend")
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_subsample_indices_classification(self, mock, backend_mock):
 
         configuration = unittest.mock.Mock(spec=Configuration)
@@ -684,26 +895,32 @@ def test_subsample_indices_classification(self, mock, backend_mock):
         D = get_binary_classification_datamanager()
         backend_mock.load_datamanager.return_value = D
         backend_mock.temporary_directory = tempfile.gettempdir()
-        evaluator = TrainEvaluator(backend_mock, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='cv',
-                                   resampling_strategy_args={'folds': 10},
-                                   metric=accuracy,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_mock,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 10},
+            metric=accuracy,
+            additional_components=dict(),
+        )
         train_indices = np.arange(69, dtype=int)
         train_indices1 = subsample_indices(
-            train_indices, 0.1449, evaluator.task_type, evaluator.Y_train)
+            train_indices, 0.1449, evaluator.task_type, evaluator.Y_train
+        )
         evaluator.subsample = 20
         train_indices2 = subsample_indices(
-            train_indices, 0.2898, evaluator.task_type, evaluator.Y_train)
+            train_indices, 0.2898, evaluator.task_type, evaluator.Y_train
+        )
         evaluator.subsample = 30
         train_indices3 = subsample_indices(
-            train_indices, 0.4347, evaluator.task_type, evaluator.Y_train)
+            train_indices, 0.4347, evaluator.task_type, evaluator.Y_train
+        )
         evaluator.subsample = 67
         train_indices4 = subsample_indices(
-            train_indices, 0.971, evaluator.task_type, evaluator.Y_train)
+            train_indices, 0.971, evaluator.task_type, evaluator.Y_train
+        )
         # Common cases
         for ti in train_indices1:
             self.assertIn(ti, train_indices2)
@@ -714,62 +931,98 @@ def test_subsample_indices_classification(self, mock, backend_mock):
 
         # Corner cases
         self.assertRaisesRegex(
-            ValueError, 'train_size=0.0 should be either positive and smaller than the '
-            r'number of samples 69 or a float in the \(0, 1\) range',
-            subsample_indices, train_indices, 0.0, evaluator.task_type, evaluator.Y_train)
+            ValueError,
+            "train_size=0.0 should be either positive and smaller than the "
+            r"number of samples 69 or a float in the \(0, 1\) range",
+            subsample_indices,
+            train_indices,
+            0.0,
+            evaluator.task_type,
+            evaluator.Y_train,
+        )
         # With equal or greater it should return a non-shuffled array of indices
         train_indices5 = subsample_indices(
-            train_indices, 1.0, evaluator.task_type, evaluator.Y_train)
+            train_indices, 1.0, evaluator.task_type, evaluator.Y_train
+        )
         self.assertTrue(np.all(train_indices5 == train_indices))
         evaluator.subsample = 68
         self.assertRaisesRegex(
-            ValueError, 'The test_size = 1 should be greater or equal to the number of '
-            'classes = 2', subsample_indices, train_indices, 0.9999, evaluator.task_type,
-            evaluator.Y_train)
+            ValueError,
+            "The test_size = 1 should be greater or equal to the number of "
+            "classes = 2",
+            subsample_indices,
+            train_indices,
+            0.9999,
+            evaluator.task_type,
+            evaluator.Y_train,
+        )
 
-    @unittest.mock.patch('autosklearn.automl_common.common.utils.backend.Backend')
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch("autosklearn.automl_common.common.utils.backend.Backend")
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_subsample_indices_regression(self, mock, backend_mock):
 
         configuration = unittest.mock.Mock(spec=Configuration)
         queue_ = multiprocessing.Queue()
         backend_mock.temporary_directory = tempfile.gettempdir()
-        evaluator = TrainEvaluator(backend_mock, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='cv',
-                                   resampling_strategy_args={'folds': 10},
-                                   metric=accuracy,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            backend_mock,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 10},
+            metric=accuracy,
+            additional_components=dict(),
+        )
         train_indices = np.arange(69, dtype=int)
-        train_indices3 = subsample_indices(train_indices, subsample=0.4347,
-                                           task_type=evaluator.task_type,
-                                           Y_train=evaluator.Y_train)
+        train_indices3 = subsample_indices(
+            train_indices,
+            subsample=0.4347,
+            task_type=evaluator.task_type,
+            Y_train=evaluator.Y_train,
+        )
         evaluator.subsample = 67
-        train_indices4 = subsample_indices(train_indices, subsample=0.4347,
-                                           task_type=evaluator.task_type,
-                                           Y_train=evaluator.Y_train)
+        train_indices4 = subsample_indices(
+            train_indices,
+            subsample=0.4347,
+            task_type=evaluator.task_type,
+            Y_train=evaluator.Y_train,
+        )
         # Common cases
         for ti in train_indices3:
             self.assertIn(ti, train_indices4)
 
         # Corner cases
         self.assertRaisesRegex(
-            ValueError, 'train_size=0.0 should be either positive and smaller than the '
-            r'number of samples 69 or a float in the \(0, 1\) range',
-            subsample_indices, train_indices, 0.0,
-            evaluator.task_type, evaluator.Y_train)
+            ValueError,
+            "train_size=0.0 should be either positive and smaller than the "
+            r"number of samples 69 or a float in the \(0, 1\) range",
+            subsample_indices,
+            train_indices,
+            0.0,
+            evaluator.task_type,
+            evaluator.Y_train,
+        )
         self.assertRaisesRegex(
-            ValueError, 'Subsample must not be larger than 1, but is 1.000100',
-            subsample_indices, train_indices, 1.0001,
-            evaluator.task_type, evaluator.Y_train)
+            ValueError,
+            "Subsample must not be larger than 1, but is 1.000100",
+            subsample_indices,
+            train_indices,
+            1.0001,
+            evaluator.task_type,
+            evaluator.Y_train,
+        )
         # With equal or greater it should return a non-shuffled array of indices
-        train_indices6 = subsample_indices(train_indices, 1.0, evaluator.task_type,
-                                           evaluator.Y_train)
+        train_indices6 = subsample_indices(
+            train_indices, 1.0, evaluator.task_type, evaluator.Y_train
+        )
         np.testing.assert_allclose(train_indices6, train_indices)
 
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_predict_proba_binary_classification(self, mock):
         D = get_binary_classification_datamanager()
         self.backend_mock.load_datamanager.return_value = D
@@ -781,30 +1034,38 @@ def test_predict_proba_binary_classification(self, mock):
         configuration = unittest.mock.Mock(spec=Configuration)
         queue_ = multiprocessing.Queue()
 
-        evaluator = TrainEvaluator(self.backend_mock, queue_,
-                                   port=self.port,
-                                   configuration=configuration,
-                                   resampling_strategy='cv',
-                                   resampling_strategy_args={'folds': 10},
-                                   output_y_hat_optimization=False,
-                                   metric=accuracy,
-                                   additional_components=dict(),
-                                   )
+        evaluator = TrainEvaluator(
+            self.backend_mock,
+            queue_,
+            port=self.port,
+            configuration=configuration,
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 10},
+            output_y_hat_optimization=False,
+            metric=accuracy,
+            additional_components=dict(),
+        )
 
         evaluator.fit_predict_and_loss()
         Y_optimization_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][1][
-            'ensemble_predictions']
+            "ensemble_predictions"
+        ]
 
         for i in range(7):
             self.assertEqual(0.9, Y_optimization_pred[i][1])
 
-    @unittest.mock.patch.object(TrainEvaluator, 'file_output')
-    @unittest.mock.patch.object(TrainEvaluator, '_partial_fit_and_predict_standard')
-    @unittest.mock.patch('autosklearn.automl_common.common.utils.backend.Backend')
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch.object(TrainEvaluator, "file_output")
+    @unittest.mock.patch.object(TrainEvaluator, "_partial_fit_and_predict_standard")
+    @unittest.mock.patch("autosklearn.automl_common.common.utils.backend.Backend")
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_fit_predict_and_loss_standard_additional_run_info(
-        self, mock, backend_mock, _partial_fit_and_predict_mock,
-            file_output_mock,
+        self,
+        mock,
+        backend_mock,
+        _partial_fit_and_predict_mock,
+        file_output_mock,
     ):
         D = get_binary_classification_datamanager()
         backend_mock.load_datamanager.return_value = D
@@ -815,7 +1076,7 @@ def test_fit_predict_and_loss_standard_additional_run_info(
             np.array([[0.1, 0.9]] * 23),
             np.array([[0.1, 0.9]] * 25),
             np.array([[0.1, 0.9]] * 6),
-            {'a': 5},
+            {"a": 5},
         )
         file_output_mock.return_value = (None, {})
 
@@ -823,10 +1084,11 @@ def test_fit_predict_and_loss_standard_additional_run_info(
         queue_ = multiprocessing.Queue()
 
         evaluator = TrainEvaluator(
-            backend_mock, queue_,
+            backend_mock,
+            queue_,
             port=self.port,
             configuration=configuration,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             output_y_hat_optimization=False,
             metric=accuracy,
             additional_components=dict(),
@@ -840,8 +1102,8 @@ def test_fit_predict_and_loss_standard_additional_run_info(
         rval = evaluator.fit_predict_and_loss(iterative=False)
         self.assertIsNone(rval)
         element = queue_.get()
-        self.assertEqual(element['status'], StatusType.SUCCESS)
-        self.assertEqual(element['additional_run_info']['a'], 5)
+        self.assertEqual(element["status"], StatusType.SUCCESS)
+        self.assertEqual(element["additional_run_info"]["a"], 5)
         self.assertEqual(_partial_fit_and_predict_mock.call_count, 1)
 
         class SideEffect(object):
@@ -856,7 +1118,7 @@ def __call__(self, *args, **kwargs):
                         np.array([[0.1, 0.9]] * 35),
                         np.array([[0.1, 0.9]] * 25),
                         np.array([[0.1, 0.9]] * 6),
-                        {'a': 5}
+                        {"a": 5},
                     )
                 else:
                     return (
@@ -864,15 +1126,17 @@ def __call__(self, *args, **kwargs):
                         np.array([[0.1, 0.9]] * 34),
                         np.array([[0.1, 0.9]] * 25),
                         np.array([[0.1, 0.9]] * 6),
-                        {'a': 5}
+                        {"a": 5},
                     )
+
         _partial_fit_and_predict_mock.side_effect = SideEffect()
         evaluator = TrainEvaluator(
-            backend_mock, queue_,
+            backend_mock,
+            queue_,
             port=self.port,
             configuration=configuration,
-            resampling_strategy='cv',
-            resampling_strategy_args={'folds': 2},
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 2},
             output_y_hat_optimization=False,
             metric=accuracy,
             additional_components=dict(),
@@ -885,28 +1149,34 @@ def __call__(self, *args, **kwargs):
 
         self.assertRaisesRegex(
             TAEAbortException,
-            'Found additional run info "{\'a\': 5}" in fold 1, '
-            'but cannot handle additional run info if fold >= 1.',
+            "Found additional run info \"{'a': 5}\" in fold 1, "
+            "but cannot handle additional run info if fold >= 1.",
             evaluator.fit_predict_and_loss,
-            iterative=False
+            iterative=False,
         )
 
-    @unittest.mock.patch.object(TrainEvaluator, '_loss')
-    @unittest.mock.patch.object(TrainEvaluator, 'finish_up')
-    @unittest.mock.patch('autosklearn.automl_common.common.utils.backend.Backend')
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch.object(TrainEvaluator, "_loss")
+    @unittest.mock.patch.object(TrainEvaluator, "finish_up")
+    @unittest.mock.patch("autosklearn.automl_common.common.utils.backend.Backend")
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_fit_predict_and_loss_iterative_additional_run_info(
-            self, mock, backend_mock, finish_up_mock, loss_mock,
+        self,
+        mock,
+        backend_mock,
+        finish_up_mock,
+        loss_mock,
     ):
-
         class Counter:
             counter = 0
 
             def __call__(self):
                 self.counter += 1
                 return False if self.counter <= 1 else True
+
         mock.estimator_supports_iterative_fit.return_value = True
-        mock.fit_transformer.return_value = ('Xt', {})
+        mock.fit_transformer.return_value = ("Xt", {})
         mock.configuration_fully_fitted.side_effect = Counter()
         mock.get_current_iter.side_effect = Counter()
         mock.get_max_iter.return_value = 1
@@ -922,10 +1192,11 @@ def __call__(self):
         queue_ = multiprocessing.Queue()
 
         evaluator = TrainEvaluator(
-            backend_mock, queue_,
+            backend_mock,
+            queue_,
             port=self.port,
             configuration=configuration,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             output_y_hat_optimization=False,
             metric=accuracy,
             budget=0.0,
@@ -938,17 +1209,23 @@ def __call__(self):
         rval = evaluator.fit_predict_and_loss(iterative=True)
         self.assertIsNone(rval)
         self.assertEqual(finish_up_mock.call_count, 1)
-        self.assertEqual(finish_up_mock.call_args[1]['additional_run_info'], 14678)
-
-    @unittest.mock.patch.object(TrainEvaluator, '_loss')
-    @unittest.mock.patch.object(TrainEvaluator, 'finish_up')
-    @unittest.mock.patch('autosklearn.automl_common.common.utils.backend.Backend')
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+        self.assertEqual(finish_up_mock.call_args[1]["additional_run_info"], 14678)
+
+    @unittest.mock.patch.object(TrainEvaluator, "_loss")
+    @unittest.mock.patch.object(TrainEvaluator, "finish_up")
+    @unittest.mock.patch("autosklearn.automl_common.common.utils.backend.Backend")
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_fit_predict_and_loss_iterative_noniterativemodel_additional_run_info(
-            self, mock, backend_mock, finish_up_mock, loss_mock,
+        self,
+        mock,
+        backend_mock,
+        finish_up_mock,
+        loss_mock,
     ):
         mock.estimator_supports_iterative_fit.return_value = False
-        mock.fit_transformer.return_value = ('Xt', {})
+        mock.fit_transformer.return_value = ("Xt", {})
         mock.get_additional_run_info.return_value = 14678
         loss_mock.return_value = 0.5
 
@@ -961,10 +1238,11 @@ def test_fit_predict_and_loss_iterative_noniterativemodel_additional_run_info(
         queue_ = multiprocessing.Queue()
 
         evaluator = TrainEvaluator(
-            backend_mock, queue_,
+            backend_mock,
+            queue_,
             port=self.port,
             configuration=configuration,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             output_y_hat_optimization=False,
             metric=accuracy,
             additional_components=dict(),
@@ -977,14 +1255,20 @@ def test_fit_predict_and_loss_iterative_noniterativemodel_additional_run_info(
         rval = evaluator.fit_predict_and_loss(iterative=True)
         self.assertIsNone(rval)
         self.assertEqual(finish_up_mock.call_count, 1)
-        self.assertEqual(finish_up_mock.call_args[1]['additional_run_info'], 14678)
-
-    @unittest.mock.patch.object(TrainEvaluator, '_loss')
-    @unittest.mock.patch.object(TrainEvaluator, 'finish_up')
-    @unittest.mock.patch('autosklearn.automl_common.common.utils.backend.Backend')
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+        self.assertEqual(finish_up_mock.call_args[1]["additional_run_info"], 14678)
+
+    @unittest.mock.patch.object(TrainEvaluator, "_loss")
+    @unittest.mock.patch.object(TrainEvaluator, "finish_up")
+    @unittest.mock.patch("autosklearn.automl_common.common.utils.backend.Backend")
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_fit_predict_and_loss_budget_additional_run_info(
-            self, mock, backend_mock, finish_up_mock, loss_mock,
+        self,
+        mock,
+        backend_mock,
+        finish_up_mock,
+        loss_mock,
     ):
         class Counter:
             counter = 0
@@ -992,12 +1276,13 @@ class Counter:
             def __call__(self):
                 self.counter += 1
                 return False if self.counter <= 1 else True
+
         mock.configuration_fully_fitted.side_effect = Counter()
         mock.get_current_iter.side_effect = Counter()
         mock.get_max_iter.return_value = 1
         mock.estimator_supports_iterative_fit.return_value = True
-        mock.fit_transformer.return_value = ('Xt', {})
-        mock.get_additional_run_info.return_value = {'val': 14678}
+        mock.fit_transformer.return_value = ("Xt", {})
+        mock.get_additional_run_info.return_value = {"val": 14678}
         mock.get_max_iter.return_value = 512
         loss_mock.return_value = 0.5
 
@@ -1010,13 +1295,14 @@ def __call__(self):
         queue_ = multiprocessing.Queue()
 
         evaluator = TrainEvaluator(
-            backend_mock, queue_,
+            backend_mock,
+            queue_,
             port=self.port,
             configuration=configuration,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             output_y_hat_optimization=False,
             metric=accuracy,
-            budget_type='iterations',
+            budget_type="iterations",
             budget=50,
             additional_components=dict(),
         )
@@ -1028,18 +1314,26 @@ def __call__(self):
         rval = evaluator.fit_predict_and_loss(iterative=False)
         self.assertIsNone(rval)
         self.assertEqual(finish_up_mock.call_count, 1)
-        self.assertEqual(finish_up_mock.call_args[1]['additional_run_info'], {'val': 14678})
+        self.assertEqual(
+            finish_up_mock.call_args[1]["additional_run_info"], {"val": 14678}
+        )
 
-    @unittest.mock.patch.object(TrainEvaluator, '_loss')
-    @unittest.mock.patch.object(TrainEvaluator, 'finish_up')
-    @unittest.mock.patch('autosklearn.automl_common.common.utils.backend.Backend')
-    @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline')
+    @unittest.mock.patch.object(TrainEvaluator, "_loss")
+    @unittest.mock.patch.object(TrainEvaluator, "finish_up")
+    @unittest.mock.patch("autosklearn.automl_common.common.utils.backend.Backend")
+    @unittest.mock.patch(
+        "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+    )
     def test_fit_predict_and_loss_budget_2_additional_run_info(
-            self, mock, backend_mock, finish_up_mock, loss_mock,
+        self,
+        mock,
+        backend_mock,
+        finish_up_mock,
+        loss_mock,
     ):
         mock.estimator_supports_iterative_fit.return_value = False
-        mock.fit_transformer.return_value = ('Xt', {})
-        mock.get_additional_run_info.return_value = {'val': 14678}
+        mock.fit_transformer.return_value = ("Xt", {})
+        mock.get_additional_run_info.return_value = {"val": 14678}
         loss_mock.return_value = 0.5
 
         D = get_binary_classification_datamanager()
@@ -1051,13 +1345,14 @@ def test_fit_predict_and_loss_budget_2_additional_run_info(
         queue_ = multiprocessing.Queue()
 
         evaluator = TrainEvaluator(
-            backend_mock, queue_,
+            backend_mock,
+            queue_,
             port=self.port,
             configuration=configuration,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             output_y_hat_optimization=False,
             metric=accuracy,
-            budget_type='subsample',
+            budget_type="subsample",
             budget=50,
             additional_components=dict(),
         )
@@ -1069,7 +1364,9 @@ def test_fit_predict_and_loss_budget_2_additional_run_info(
         rval = evaluator.fit_predict_and_loss(iterative=False)
         self.assertIsNone(rval)
         self.assertEqual(finish_up_mock.call_count, 1)
-        self.assertEqual(finish_up_mock.call_args[1]['additional_run_info'], {'val': 14678})
+        self.assertEqual(
+            finish_up_mock.call_args[1]["additional_run_info"], {"val": 14678}
+        )
 
     def test_get_results(self):
         queue_ = multiprocessing.Queue()
@@ -1082,33 +1379,39 @@ def test_get_results(self):
 
     def test_datasets(self):
         for getter in get_dataset_getters():
-            testname = '%s_%s' % (os.path.basename(__file__).
-                                  replace('.pyc', '').replace('.py', ''),
-                                  getter.__name__)
+            testname = "%s_%s" % (
+                os.path.basename(__file__).replace(".pyc", "").replace(".py", ""),
+                getter.__name__,
+            )
 
             with self.subTest(testname):
                 D = getter()
                 D_ = copy.deepcopy(D)
-                y = D.data['Y_train']
+                y = D.data["Y_train"]
                 if len(y.shape) == 2 and y.shape[1] == 1:
-                    D_.data['Y_train'] = y.flatten()
+                    D_.data["Y_train"] = y.flatten()
                 self.backend_mock.load_datamanager.return_value = D_
                 queue_ = multiprocessing.Queue()
-                metric_lookup = {MULTILABEL_CLASSIFICATION: f1_macro,
-                                 BINARY_CLASSIFICATION: accuracy,
-                                 MULTICLASS_CLASSIFICATION: accuracy,
-                                 REGRESSION: r2}
-                evaluator = TrainEvaluator(self.backend_mock, queue_,
-                                           port=self.port,
-                                           resampling_strategy='cv',
-                                           resampling_strategy_args={'folds': 2},
-                                           output_y_hat_optimization=False,
-                                           metric=metric_lookup[D.info['task']],
-                                           additional_components=dict(),)
+                metric_lookup = {
+                    MULTILABEL_CLASSIFICATION: f1_macro,
+                    BINARY_CLASSIFICATION: accuracy,
+                    MULTICLASS_CLASSIFICATION: accuracy,
+                    REGRESSION: r2,
+                }
+                evaluator = TrainEvaluator(
+                    self.backend_mock,
+                    queue_,
+                    port=self.port,
+                    resampling_strategy="cv",
+                    resampling_strategy_args={"folds": 2},
+                    output_y_hat_optimization=False,
+                    metric=metric_lookup[D.info["task"]],
+                    additional_components=dict(),
+                )
 
                 evaluator.fit_predict_and_loss()
                 rval = evaluator.queue.get(timeout=1)
-                self.assertTrue(np.isfinite(rval['loss']))
+                self.assertTrue(np.isfinite(rval["loss"]))
 
     ############################################################################
     # Test obtaining a splitter object from scikit-learn
@@ -1122,147 +1425,142 @@ def test_get_splitter(self, te_mock):
 
         # holdout, binary classification
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
+        evaluator.resampling_strategy = "holdout"
         evaluator.resampling_strategy_args = {}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection.StratifiedShuffleSplit)
+        self.assertIsInstance(cv, sklearn.model_selection.StratifiedShuffleSplit)
 
         # holdout, binary classification, no shuffle
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
-        evaluator.resampling_strategy_args = {'shuffle': False}
+        evaluator.resampling_strategy = "holdout"
+        evaluator.resampling_strategy_args = {"shuffle": False}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection.PredefinedSplit)
+        self.assertIsInstance(cv, sklearn.model_selection.PredefinedSplit)
 
         # holdout, binary classification, fallback to custom shuffle split
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1, 2])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1, 2])
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
+        evaluator.resampling_strategy = "holdout"
         evaluator.resampling_strategy_args = {}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              autosklearn.evaluation.splitter.CustomStratifiedShuffleSplit)
+        self.assertIsInstance(
+            cv, autosklearn.evaluation.splitter.CustomStratifiedShuffleSplit
+        )
 
         # cv, binary classification
-        D.data['Y_train'] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection._split.StratifiedKFold)
+        self.assertIsInstance(cv, sklearn.model_selection._split.StratifiedKFold)
 
         # cv, binary classification, shuffle is True
-        D.data['Y_train'] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection._split.StratifiedKFold)
+        self.assertIsInstance(cv, sklearn.model_selection._split.StratifiedKFold)
         self.assertTrue(cv.shuffle)
 
         # cv, binary classification, shuffle is False
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5, 'shuffle': False}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5, "shuffle": False}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection._split.KFold)
+        self.assertIsInstance(cv, sklearn.model_selection._split.KFold)
         self.assertFalse(cv.shuffle)
 
         # cv, binary classification, fallback to custom splitter
-        D.data['Y_train'] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2])
+        D.data["Y_train"] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2])
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              autosklearn.evaluation.splitter.CustomStratifiedKFold)
+        self.assertIsInstance(cv, autosklearn.evaluation.splitter.CustomStratifiedKFold)
 
         # regression, shuffle split
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
+        evaluator.resampling_strategy = "holdout"
         evaluator.resampling_strategy_args = {}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection._split.ShuffleSplit)
+        self.assertIsInstance(cv, sklearn.model_selection._split.ShuffleSplit)
 
         # regression, no shuffle
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
-        evaluator.resampling_strategy_args = {'shuffle': False}
+        evaluator.resampling_strategy = "holdout"
+        evaluator.resampling_strategy_args = {"shuffle": False}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection._split.PredefinedSplit)
+        self.assertIsInstance(cv, sklearn.model_selection._split.PredefinedSplit)
 
         # regression cv, KFold
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, sklearn.model_selection._split.KFold)
         self.assertTrue(cv.shuffle)
 
         # regression cv, KFold, no shuffling
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5, 'shuffle': False}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5, "shuffle": False}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, sklearn.model_selection._split.KFold)
         self.assertFalse(cv.shuffle)
 
         # multioutput regression, shuffle split
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
+        evaluator.resampling_strategy = "holdout"
         evaluator.resampling_strategy_args = {}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection._split.ShuffleSplit)
+        self.assertIsInstance(cv, sklearn.model_selection._split.ShuffleSplit)
 
         # multioutput regression, no shuffle
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
-        evaluator.resampling_strategy_args = {'shuffle': False}
+        evaluator.resampling_strategy = "holdout"
+        evaluator.resampling_strategy_args = {"shuffle": False}
         cv = evaluator.get_splitter(D)
-        self.assertIsInstance(cv,
-                              sklearn.model_selection._split.PredefinedSplit)
+        self.assertIsInstance(cv, sklearn.model_selection._split.PredefinedSplit)
 
         # multioutput regression cv, KFold
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, sklearn.model_selection._split.KFold)
         self.assertTrue(cv.shuffle)
 
         # multioutput regression cv, KFold, no shuffling
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'cv'
-        evaluator.resampling_strategy_args = {'folds': 5, 'shuffle': False}
+        evaluator.resampling_strategy = "cv"
+        evaluator.resampling_strategy_args = {"folds": 5, "shuffle": False}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, sklearn.model_selection._split.KFold)
         self.assertFalse(cv.shuffle)
@@ -1276,19 +1574,26 @@ def test_get_splitter_cv_object(self, te_mock):
         D.feat_type = {}
 
         # GroupKFold, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
-        D.data['X_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["X_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupKFold(n_splits=2)
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, GroupKFold)
-        self.assertEqual(cv.get_n_splits(groups=evaluator.resampling_strategy_args['groups']), 2)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # GroupKFold, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupKFold(n_splits=2)
         evaluator.resampling_strategy_args = None
@@ -1296,23 +1601,31 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # GroupKFold, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupKFold(n_splits=2)
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, GroupKFold)
-        self.assertEqual(cv.get_n_splits(groups=evaluator.resampling_strategy_args['groups']), 2)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # GroupKFold, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupKFold(n_splits=2)
         evaluator.resampling_strategy_args = None
@@ -1320,25 +1633,35 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # GroupKFold, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupKFold(n_splits=2)
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, GroupKFold)
-        self.assertEqual(cv.get_n_splits(groups=evaluator.resampling_strategy_args['groups']), 2)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # GroupKFold, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupKFold(n_splits=2)
         evaluator.resampling_strategy_args = None
@@ -1346,110 +1669,154 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # KFold, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = KFold(n_splits=4, shuffle=True, random_state=5)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, KFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4
+        )
         self.assertTrue(cv.shuffle)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # KFold, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = KFold(n_splits=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, KFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 3
+        )
         self.assertFalse(cv.shuffle)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # KFold, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = KFold(n_splits=4, shuffle=True, random_state=5)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, KFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4
+        )
         self.assertTrue(cv.shuffle)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # KFold, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = KFold(n_splits=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, KFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 3
+        )
         self.assertFalse(cv.shuffle)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # KFold, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = KFold(n_splits=4, shuffle=True, random_state=5)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, KFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4
+        )
         self.assertTrue(cv.shuffle)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # KFold, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = KFold(n_splits=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, KFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 3
+        )
         self.assertFalse(cv.shuffle)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeaveOneGroupOut, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneGroupOut()
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeaveOneGroupOut)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeaveOneGroupOut, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneGroupOut()
         evaluator.resampling_strategy_args = None
@@ -1457,22 +1824,28 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # LeaveOneGroupOut, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneGroupOut()
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeaveOneGroupOut)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeaveOneGroupOut, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneGroupOut()
         evaluator.resampling_strategy_args = None
@@ -1480,24 +1853,32 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # LeaveOneGroupOut, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneGroupOut()
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeaveOneGroupOut)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeaveOneGroupOut, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneGroupOut()
         evaluator.resampling_strategy_args = None
@@ -1505,21 +1886,27 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # LeavePGroupsOut, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePGroupsOut(n_groups=1)
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePGroupsOut)
         self.assertEqual(cv.n_groups, 1)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePGroupsOut, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneGroupOut()
         evaluator.resampling_strategy_args = None
@@ -1527,23 +1914,29 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # LeavePGroupsOut, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePGroupsOut(n_groups=1)
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePGroupsOut)
         self.assertEqual(cv.n_groups, 1)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePGroupsOut, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePGroupsOut(n_groups=1)
         evaluator.resampling_strategy_args = None
@@ -1551,25 +1944,33 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # LeavePGroupsOut, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePGroupsOut(n_groups=1)
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePGroupsOut)
         self.assertEqual(cv.n_groups, 1)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePGroupsOut, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePGroupsOut(n_groups=1)
         evaluator.resampling_strategy_args = None
@@ -1577,384 +1978,567 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # LeaveOneOut, classification
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneOut()
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeaveOneOut)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeaveOneOut, regression
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneOut()
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeaveOneOut)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeaveOneOut, multi-output regression
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeaveOneOut()
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeaveOneOut)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePOut, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = LeavePOut(p=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePOut)
         self.assertEqual(cv.p, 3)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePOut, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePOut(p=2)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePOut)
         self.assertEqual(cv.p, 2)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePOut, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = LeavePOut(p=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePOut)
         self.assertEqual(cv.p, 3)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePOut, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePOut(p=2)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePOut)
         self.assertEqual(cv.p, 2)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePOut, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = LeavePOut(p=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePOut)
         self.assertEqual(cv.p, 3)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # LeavePOut, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = LeavePOut(p=2)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, LeavePOut)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # PredefinedSplit, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = PredefinedSplit(test_fold=np.array([0, 1, 0, 1, 0, 1]))
+        evaluator.resampling_strategy = PredefinedSplit(
+            test_fold=np.array([0, 1, 0, 1, 0, 1])
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, PredefinedSplit)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # PredefinedSplit, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = PredefinedSplit(test_fold=np.array([0, 1, 0, 1, 0, 1]))
+        evaluator.resampling_strategy = PredefinedSplit(
+            test_fold=np.array([0, 1, 0, 1, 0, 1])
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, PredefinedSplit)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # PredefinedSplit, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = PredefinedSplit(test_fold=np.array([0, 1, 0, 1, 0, 1]))
+        evaluator.resampling_strategy = PredefinedSplit(
+            test_fold=np.array([0, 1, 0, 1, 0, 1])
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, PredefinedSplit)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedKFold, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = RepeatedKFold(n_splits=4, n_repeats=3, random_state=5)
+        evaluator.resampling_strategy = RepeatedKFold(
+            n_splits=4, n_repeats=3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4*3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4 * 3
+        )
         self.assertEqual(cv.n_repeats, 3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedKFold, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = RepeatedKFold(n_splits=5, n_repeats=10)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 5*10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 5 * 10
+        )
         self.assertEqual(cv.n_repeats, 10)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedKFold, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = RepeatedKFold(n_splits=4, n_repeats=3, random_state=5)
+        evaluator.resampling_strategy = RepeatedKFold(
+            n_splits=4, n_repeats=3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4*3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4 * 3
+        )
         self.assertEqual(cv.n_repeats, 3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedKFold, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = RepeatedKFold(n_splits=5, n_repeats=10)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 5*10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 5 * 10
+        )
         self.assertEqual(cv.n_repeats, 10)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedKFold, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = RepeatedKFold(n_splits=4, n_repeats=3, random_state=5)
+        evaluator.resampling_strategy = RepeatedKFold(
+            n_splits=4, n_repeats=3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4*3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4 * 3
+        )
         self.assertEqual(cv.n_repeats, 3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedKFold, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = RepeatedKFold(n_splits=5, n_repeats=10)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 5*10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 5 * 10
+        )
         self.assertEqual(cv.n_repeats, 10)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedStratifiedKFold, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = RepeatedStratifiedKFold(
-            n_splits=2, n_repeats=3, random_state=5)
+            n_splits=2, n_repeats=3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedStratifiedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2*3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2 * 3
+        )
         self.assertEqual(cv.n_repeats, 3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # RepeatedStratifiedKFold, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
-        D.data['X_train'] = D.data['Y_train']
+        D.data["Y_train"] = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+        D.data["X_train"] = D.data["Y_train"]
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = RepeatedStratifiedKFold(n_splits=5, n_repeats=10)
+        evaluator.resampling_strategy = RepeatedStratifiedKFold(
+            n_splits=5, n_repeats=10
+        )
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, RepeatedStratifiedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 5*10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 5 * 10
+        )
         self.assertEqual(cv.n_repeats, 10)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # StratifiedKFold, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
-        D.data['X_train'] = D.data['Y_train']
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["X_train"] = D.data["Y_train"]
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = StratifiedKFold
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = StratifiedKFold(n_splits=2, shuffle=True, random_state=5)
+        evaluator.resampling_strategy = StratifiedKFold(
+            n_splits=2, shuffle=True, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, StratifiedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertTrue(cv.shuffle)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # StratifiedKFold, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = StratifiedKFold(n_splits=3, shuffle=False)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, StratifiedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 3
+        )
         self.assertFalse(cv.shuffle)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # TimeSeriesSplit, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = TimeSeriesSplit(n_splits=4, max_train_size=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, TimeSeriesSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4
+        )
         self.assertEqual(cv.max_train_size, 3)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # TimeSeriesSplit, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = TimeSeriesSplit(n_splits=3)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, TimeSeriesSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 3
+        )
         self.assertIsNone(cv.max_train_size)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # TimeSeriesSplit, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = TimeSeriesSplit(n_splits=4, max_train_size=3)
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, TimeSeriesSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 4)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 4
+        )
         self.assertEqual(cv.max_train_size, 3)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # TimeSeriesSplit, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = TimeSeriesSplit(n_splits=3)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, TimeSeriesSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 3
+        )
         self.assertIsNone(cv.max_train_size)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # StratifiedKFold, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = StratifiedKFold(n_splits=3)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, StratifiedKFold)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 3)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 3
+        )
         self.assertFalse(cv.shuffle)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # GroupShuffleSplit, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
-        evaluator.resampling_strategy = GroupShuffleSplit(n_splits=2, test_size=0.3,
-                                                          random_state=5)
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy = GroupShuffleSplit(
+            n_splits=2, test_size=0.3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, GroupShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertEqual(cv.test_size, 0.3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # GroupShuffleSplit, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupShuffleSplit(n_splits=5)
         evaluator.resampling_strategy_args = None
@@ -1962,27 +2546,35 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # GroupShuffleSplit, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
-        evaluator.resampling_strategy = GroupShuffleSplit(n_splits=2, test_size=0.3,
-                                                          random_state=5)
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy = GroupShuffleSplit(
+            n_splits=2, test_size=0.3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, GroupShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertEqual(cv.test_size, 0.3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # GroupShuffleSplit, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupShuffleSplit(n_splits=5)
         evaluator.resampling_strategy_args = None
@@ -1990,29 +2582,39 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # GroupShuffleSplit, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy_args = {'groups': np.array([1, 1, 2, 1, 2, 2])}
-        evaluator.resampling_strategy = GroupShuffleSplit(n_splits=2, test_size=0.3,
-                                                          random_state=5)
+        evaluator.resampling_strategy_args = {"groups": np.array([1, 1, 2, 1, 2, 2])}
+        evaluator.resampling_strategy = GroupShuffleSplit(
+            n_splits=2, test_size=0.3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, GroupShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertEqual(cv.test_size, 0.3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # GroupShuffleSplit, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = GroupShuffleSplit(n_splits=5)
         evaluator.resampling_strategy_args = None
@@ -2020,129 +2622,188 @@ def test_get_splitter_cv_object(self, te_mock):
             ValueError,
             "The 'groups' parameter should not be None",
             evaluator.get_splitter,
-            D)
+            D,
+        )
 
         # StratifiedShuffleSplit, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
         evaluator.resampling_strategy = StratifiedShuffleSplit(
-            n_splits=2, test_size=0.3, random_state=5)
+            n_splits=2, test_size=0.3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, StratifiedShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertEqual(cv.test_size, 0.3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # StratifiedShuffleSplit, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1,
-                                      0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1])
-        D.data['X_train'] = D.data['Y_train']
+        D.data["Y_train"] = np.array(
+            [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]
+        )
+        D.data["X_train"] = D.data["Y_train"]
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = StratifiedShuffleSplit(n_splits=10)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, StratifiedShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 10
+        )
         self.assertIsNone(cv.test_size)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # ShuffleSplit, classification with args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
-        D.data['X_train'] = D.data['Y_train']
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["X_train"] = D.data["Y_train"]
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = ShuffleSplit(n_splits=2, test_size=0.3, random_state=5)
+        evaluator.resampling_strategy = ShuffleSplit(
+            n_splits=2, test_size=0.3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, ShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertEqual(cv.test_size, 0.3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # ShuffleSplit, classification no args
-        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
+        D.data["Y_train"] = np.array([0, 0, 0, 1, 1, 1])
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = ShuffleSplit(n_splits=10)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, ShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 10
+        )
         self.assertIsNone(cv.test_size)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # ShuffleSplit, regression with args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = ShuffleSplit(n_splits=2, test_size=0.3, random_state=5)
+        evaluator.resampling_strategy = ShuffleSplit(
+            n_splits=2, test_size=0.3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, ShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertEqual(cv.test_size, 0.3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # ShuffleSplit, regression no args
-        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
-        D.info['task'] = REGRESSION
+        D.data["Y_train"] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
+        D.info["task"] = REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = ShuffleSplit(n_splits=10)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, ShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 10
+        )
         self.assertIsNone(cv.test_size)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # ShuffleSplit, multi-output regression with args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy_args = None
-        evaluator.resampling_strategy = ShuffleSplit(n_splits=2, test_size=0.3, random_state=5)
+        evaluator.resampling_strategy = ShuffleSplit(
+            n_splits=2, test_size=0.3, random_state=5
+        )
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, ShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 2)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 2
+        )
         self.assertEqual(cv.test_size, 0.3)
         self.assertEqual(cv.random_state, 5)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
         # ShuffleSplit, multi-output regression no args
-        D.data['Y_train'] = np.array([[0.0, 0.1], [0.2, 0.3], [0.4, 0.5],
-                                     [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]])
-        D.info['task'] = MULTIOUTPUT_REGRESSION
+        D.data["Y_train"] = np.array(
+            [[0.0, 0.1], [0.2, 0.3], [0.4, 0.5], [1.0, 1.1], [1.2, 1.3], [1.4, 1.5]]
+        )
+        D.info["task"] = MULTIOUTPUT_REGRESSION
         evaluator = TrainEvaluator()
         evaluator.resampling_strategy = ShuffleSplit(n_splits=10)
         evaluator.resampling_strategy_args = None
         cv = evaluator.get_splitter(D)
         self.assertIsInstance(cv, ShuffleSplit)
-        self.assertEqual(cv.get_n_splits(
-            groups=evaluator.resampling_strategy_args['groups']), 10)
+        self.assertEqual(
+            cv.get_n_splits(groups=evaluator.resampling_strategy_args["groups"]), 10
+        )
         self.assertIsNone(cv.test_size)
         self.assertIsNone(cv.random_state)
-        next(cv.split(D.data['Y_train'], D.data['Y_train'],
-                      groups=evaluator.resampling_strategy_args['groups']))
+        next(
+            cv.split(
+                D.data["Y_train"],
+                D.data["Y_train"],
+                groups=evaluator.resampling_strategy_args["groups"],
+            )
+        )
 
     @unittest.mock.patch.object(TrainEvaluator, "__init__")
     def test_holdout_split_size(self, te_mock):
@@ -2151,102 +2812,119 @@ def test_holdout_split_size(self, te_mock):
         D.feat_type = {}
 
         evaluator = TrainEvaluator()
-        evaluator.resampling_strategy = 'holdout'
+        evaluator.resampling_strategy = "holdout"
 
         # Exact Ratio
         D.data = dict(Y_train=np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]))
         D.info = dict(task=BINARY_CLASSIFICATION)
-        evaluator.resampling_strategy_args = {'shuffle': True,
-                                              'train_size': 0.7}
+        evaluator.resampling_strategy_args = {"shuffle": True, "train_size": 0.7}
         cv = evaluator.get_splitter(D)
 
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 7)
         self.assertEqual(len(test_samples), 3)
 
         # No Shuffle
-        evaluator.resampling_strategy_args = {'shuffle': False,
-                                              'train_size': 0.7}
+        evaluator.resampling_strategy_args = {"shuffle": False, "train_size": 0.7}
         cv = evaluator.get_splitter(D)
 
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 7)
         self.assertEqual(len(test_samples), 3)
 
         # Rounded Ratio
         D.data = dict(Y_train=np.array([0, 0, 0, 0, 0, 1, 1, 1, 1]))
 
-        evaluator.resampling_strategy_args = {'shuffle': True,
-                                              'train_size': 0.7}
+        evaluator.resampling_strategy_args = {"shuffle": True, "train_size": 0.7}
         cv = evaluator.get_splitter(D)
 
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 6)
         self.assertEqual(len(test_samples), 3)
 
         # Rounded Ratio No Shuffle
-        evaluator.resampling_strategy_args = {'shuffle': False,
-                                              'train_size': 0.7}
+        evaluator.resampling_strategy_args = {"shuffle": False, "train_size": 0.7}
         cv = evaluator.get_splitter(D)
 
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 6)
         self.assertEqual(len(test_samples), 3)
 
         # More data
-        evaluator.resampling_strategy_args = {'shuffle': True,
-                                              'train_size': 0.7}
+        evaluator.resampling_strategy_args = {"shuffle": True, "train_size": 0.7}
 
         D.data = dict(Y_train=np.zeros((900, 1)))
         cv = evaluator.get_splitter(D)
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 630)
         self.assertEqual(len(test_samples), 270)
 
-        evaluator.resampling_strategy_args = {'train_size': 0.752}
+        evaluator.resampling_strategy_args = {"train_size": 0.752}
         D.data = dict(Y_train=np.zeros((900, 1)))
         cv = evaluator.get_splitter(D)
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 676)
         self.assertEqual(len(test_samples), 224)
 
         # Multilabel Exact Ratio
-        D.data = dict(Y_train=np.array([[0, 0], [0, 1], [1, 1], [1, 0], [1, 1],
-                                        [1, 1], [1, 1], [1, 0], [1, 1], [1, 1]]
-                                       ))
+        D.data = dict(
+            Y_train=np.array(
+                [
+                    [0, 0],
+                    [0, 1],
+                    [1, 1],
+                    [1, 0],
+                    [1, 1],
+                    [1, 1],
+                    [1, 1],
+                    [1, 0],
+                    [1, 1],
+                    [1, 1],
+                ]
+            )
+        )
         D.info = dict(task=MULTILABEL_CLASSIFICATION)
-        evaluator.resampling_strategy_args = {'shuffle': True,
-                                              'train_size': 0.7}
+        evaluator.resampling_strategy_args = {"shuffle": True, "train_size": 0.7}
         cv = evaluator.get_splitter(D)
 
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 7)
         self.assertEqual(len(test_samples), 3)
 
         # Multilabel No Shuffle
-        D.data = dict(Y_train=np.array([[0, 0], [0, 1], [1, 1], [1, 0], [1, 1],
-                                        [1, 1], [1, 1], [1, 0], [1, 1]]))
-        evaluator.resampling_strategy_args = {'shuffle': False,
-                                              'train_size': 0.7}
+        D.data = dict(
+            Y_train=np.array(
+                [[0, 0], [0, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1]]
+            )
+        )
+        evaluator.resampling_strategy_args = {"shuffle": False, "train_size": 0.7}
         cv = evaluator.get_splitter(D)
 
         self.assertEqual(cv.get_n_splits(), 1)
-        train_samples, test_samples = next(cv.split(D.data['Y_train'],
-                                                    D.data['Y_train']))
+        train_samples, test_samples = next(
+            cv.split(D.data["Y_train"], D.data["Y_train"])
+        )
         self.assertEqual(len(train_samples), 6)
         self.assertEqual(len(test_samples), 3)
 
@@ -2255,16 +2933,17 @@ class FunctionsTest(unittest.TestCase):
     def setUp(self):
         self.queue = multiprocessing.Queue()
         self.configuration = get_configuration_space(
-            {'task': MULTICLASS_CLASSIFICATION,
-             'is_sparse': False}).get_default_configuration()
+            {"task": MULTICLASS_CLASSIFICATION, "is_sparse": False}
+        ).get_default_configuration()
         self.data = get_multiclass_classification_datamanager()
-        self.tmp_dir = os.path.join(os.path.dirname(__file__),
-                                    '.test_holdout_functions')
-        self.n = len(self.data.data['Y_train'])
-        self.y = self.data.data['Y_train'].flatten()
+        self.tmp_dir = os.path.join(
+            os.path.dirname(__file__), ".test_holdout_functions"
+        )
+        self.n = len(self.data.data["Y_train"])
+        self.y = self.data.data["Y_train"].flatten()
 
         tmp_dir_name = self.id()
-        self.ev_path = os.path.join(this_directory, '.tmp_evaluations', tmp_dir_name)
+        self.ev_path = os.path.join(this_directory, ".tmp_evaluations", tmp_dir_name)
         if os.path.exists(self.ev_path):
             shutil.rmtree(self.ev_path)
         os.makedirs(self.ev_path, exist_ok=False)
@@ -2274,12 +2953,14 @@ def setUp(self):
         self.backend.get_cv_model_dir.return_value = self.ev_path
         dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)]
         dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)]
-        dummy_cv_model_files = [os.path.join(self.ev_path, str(n)) for n in range(200, 300)]
+        dummy_cv_model_files = [
+            os.path.join(self.ev_path, str(n)) for n in range(200, 300)
+        ]
         self.backend.get_model_path.side_effect = dummy_model_files
         self.backend.get_cv_model_path.side_effect = dummy_cv_model_files
         self.backend.get_prediction_output_path.side_effect = dummy_pred_files
         self.backend.load_datamanager.return_value = self.data
-        self.dataset_name = json.dumps({'task_id': 'test'})
+        self.dataset_name = json.dumps({"task_id": "test"})
         self.port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
 
     def tearDown(self):
@@ -2292,7 +2973,7 @@ def test_eval_holdout(self):
             port=self.port,
             config=self.configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
@@ -2307,9 +2988,9 @@ def test_eval_holdout(self):
         )
         info = read_queue(self.queue)
         self.assertEqual(len(info), 1)
-        self.assertAlmostEqual(info[0]['loss'], 0.030303030303030276, places=3)
-        self.assertEqual(info[0]['status'], StatusType.SUCCESS)
-        self.assertNotIn('bac_metric', info[0]['additional_run_info'])
+        self.assertAlmostEqual(info[0]["loss"], 0.030303030303030276, places=3)
+        self.assertEqual(info[0]["status"], StatusType.SUCCESS)
+        self.assertNotIn("bac_metric", info[0]["additional_run_info"])
 
     def test_eval_holdout_all_loss_functions(self):
         eval_holdout(
@@ -2317,7 +2998,7 @@ def test_eval_holdout_all_loss_functions(self):
             port=self.port,
             config=self.configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
@@ -2334,34 +3015,36 @@ def test_eval_holdout_all_loss_functions(self):
         self.assertEqual(len(rval), 1)
 
         fixture = {
-            'accuracy': 0.030303030303030276,
-            'balanced_accuracy': 0.033333333333333326,
-            'f1_macro': 0.032036613272311221,
-            'f1_micro': 0.030303030303030276,
-            'f1_weighted': 0.030441716940572849,
-            'log_loss': 0.06376745642134637,
-            'precision_macro': 0.02777777777777779,
-            'precision_micro': 0.030303030303030276,
-            'precision_weighted': 0.027777777777777901,
-            'recall_macro': 0.033333333333333326,
-            'recall_micro': 0.030303030303030276,
-            'recall_weighted': 0.030303030303030276,
-            'num_run': 1,
-            'validation_loss': 0.0,
-            'test_loss': 0.04,
-            'train_loss': 0.0,
+            "accuracy": 0.030303030303030276,
+            "balanced_accuracy": 0.033333333333333326,
+            "f1_macro": 0.032036613272311221,
+            "f1_micro": 0.030303030303030276,
+            "f1_weighted": 0.030441716940572849,
+            "log_loss": 0.06376745642134637,
+            "precision_macro": 0.02777777777777779,
+            "precision_micro": 0.030303030303030276,
+            "precision_weighted": 0.027777777777777901,
+            "recall_macro": 0.033333333333333326,
+            "recall_micro": 0.030303030303030276,
+            "recall_weighted": 0.030303030303030276,
+            "num_run": 1,
+            "validation_loss": 0.0,
+            "test_loss": 0.04,
+            "train_loss": 0.0,
         }
 
-        additional_run_info = rval[0]['additional_run_info']
+        additional_run_info = rval[0]["additional_run_info"]
         for key, value in fixture.items():
-            self.assertAlmostEqual(additional_run_info[key], fixture[key],
-                                   msg=key)
-        self.assertIn('duration', additional_run_info)
-        self.assertEqual(len(additional_run_info), len(fixture) + 1,
-                         msg=sorted(additional_run_info.items()))
+            self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key)
+        self.assertIn("duration", additional_run_info)
+        self.assertEqual(
+            len(additional_run_info),
+            len(fixture) + 1,
+            msg=sorted(additional_run_info.items()),
+        )
 
-        self.assertAlmostEqual(rval[0]['loss'], 0.030303030303030276, places=3)
-        self.assertEqual(rval[0]['status'], StatusType.SUCCESS)
+        self.assertAlmostEqual(rval[0]["loss"], 0.030303030303030276, places=3)
+        self.assertEqual(rval[0]["status"], StatusType.SUCCESS)
 
     def test_eval_holdout_iterative_fit_no_timeout(self):
         eval_iterative_holdout(
@@ -2369,7 +3052,7 @@ def test_eval_holdout_iterative_fit_no_timeout(self):
             port=self.port,
             config=self.configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
@@ -2384,9 +3067,9 @@ def test_eval_holdout_iterative_fit_no_timeout(self):
         )
         rval = read_queue(self.queue)
         self.assertEqual(len(rval), 9)
-        self.assertAlmostEqual(rval[-1]['loss'], 0.030303030303030276)
-        self.assertEqual(rval[0]['status'], StatusType.DONOTADVANCE)
-        self.assertEqual(rval[-1]['status'], StatusType.SUCCESS)
+        self.assertAlmostEqual(rval[-1]["loss"], 0.030303030303030276)
+        self.assertEqual(rval[0]["status"], StatusType.DONOTADVANCE)
+        self.assertEqual(rval[-1]["status"], StatusType.SUCCESS)
 
     def test_eval_holdout_budget_iterations(self):
         eval_holdout(
@@ -2394,7 +3077,7 @@ def test_eval_holdout_budget_iterations(self):
             port=self.port,
             config=self.configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
@@ -2406,45 +3089,45 @@ def test_eval_holdout_budget_iterations(self):
             instance=self.dataset_name,
             metric=accuracy,
             budget=1,
-            budget_type='iterations',
+            budget_type="iterations",
             additional_components=dict(),
         )
         info = read_queue(self.queue)
         self.assertEqual(len(info), 1)
-        self.assertAlmostEqual(info[0]['loss'], 0.06060606060606055, places=3)
-        self.assertEqual(info[0]['status'], StatusType.SUCCESS)
-        self.assertNotIn('bac_metric', info[0]['additional_run_info'])
+        self.assertAlmostEqual(info[0]["loss"], 0.06060606060606055, places=3)
+        self.assertEqual(info[0]["status"], StatusType.SUCCESS)
+        self.assertNotIn("bac_metric", info[0]["additional_run_info"])
 
     def test_eval_holdout_budget_iterations_converged(self):
         configuration = get_configuration_space(
-            exclude={'classifier': ['random_forest', 'liblinear_svc']},
-            info={'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False},
+            exclude={"classifier": ["random_forest", "liblinear_svc"]},
+            info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False},
         ).get_default_configuration()
         eval_holdout(
             queue=self.queue,
             port=self.port,
             config=configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
             scoring_functions=None,
             output_y_hat_optimization=True,
             include=None,
-            exclude={'classifier': ['random_forest', 'liblinear_svc']},
+            exclude={"classifier": ["random_forest", "liblinear_svc"]},
             disable_file_output=False,
             instance=self.dataset_name,
             metric=accuracy,
             budget=80,
-            budget_type='iterations',
+            budget_type="iterations",
             additional_components=dict(),
         )
         info = read_queue(self.queue)
         self.assertEqual(len(info), 1)
-        self.assertAlmostEqual(info[0]['loss'], 0.18181818181818177, places=3)
-        self.assertEqual(info[0]['status'], StatusType.DONOTADVANCE)
-        self.assertNotIn('bac_metric', info[0]['additional_run_info'])
+        self.assertAlmostEqual(info[0]["loss"], 0.18181818181818177, places=3)
+        self.assertEqual(info[0]["status"], StatusType.DONOTADVANCE)
+        self.assertNotIn("bac_metric", info[0]["additional_run_info"])
 
     def test_eval_holdout_budget_subsample(self):
         eval_holdout(
@@ -2452,7 +3135,7 @@ def test_eval_holdout_budget_subsample(self):
             port=self.port,
             config=self.configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
@@ -2464,14 +3147,14 @@ def test_eval_holdout_budget_subsample(self):
             instance=self.dataset_name,
             metric=accuracy,
             budget=30,
-            budget_type='subsample',
+            budget_type="subsample",
             additional_components=dict(),
         )
         info = read_queue(self.queue)
         self.assertEqual(len(info), 1)
-        self.assertAlmostEqual(info[0]['loss'], 0.0)
-        self.assertEqual(info[0]['status'], StatusType.SUCCESS)
-        self.assertNotIn('bac_metric', info[0]['additional_run_info'])
+        self.assertAlmostEqual(info[0]["loss"], 0.0)
+        self.assertEqual(info[0]["status"], StatusType.SUCCESS)
+        self.assertNotIn("bac_metric", info[0]["additional_run_info"])
 
     def test_eval_holdout_budget_mixed_iterations(self):
         print(self.configuration)
@@ -2480,7 +3163,7 @@ def test_eval_holdout_budget_mixed_iterations(self):
             port=self.port,
             config=self.configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
@@ -2492,44 +3175,44 @@ def test_eval_holdout_budget_mixed_iterations(self):
             instance=self.dataset_name,
             metric=accuracy,
             budget=1,
-            budget_type='mixed',
-            additional_components=dict()
+            budget_type="mixed",
+            additional_components=dict(),
         )
         info = read_queue(self.queue)
         self.assertEqual(len(info), 1)
-        self.assertAlmostEqual(info[0]['loss'], 0.06060606060606055)
+        self.assertAlmostEqual(info[0]["loss"], 0.06060606060606055)
 
     def test_eval_holdout_budget_mixed_subsample(self):
         configuration = get_configuration_space(
-            exclude={'classifier': ['random_forest']},
-            info={'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False},
+            exclude={"classifier": ["random_forest"]},
+            info={"task": MULTICLASS_CLASSIFICATION, "is_sparse": False},
         ).get_default_configuration()
-        self.assertEqual(configuration['classifier:__choice__'], 'liblinear_svc')
+        self.assertEqual(configuration["classifier:__choice__"], "liblinear_svc")
         eval_holdout(
             queue=self.queue,
             port=self.port,
             config=configuration,
             backend=self.backend,
-            resampling_strategy='holdout',
+            resampling_strategy="holdout",
             resampling_strategy_args=None,
             seed=1,
             num_run=1,
             scoring_functions=None,
             output_y_hat_optimization=True,
             include=None,
-            exclude={'classifier': ['random_forest']},
+            exclude={"classifier": ["random_forest"]},
             disable_file_output=False,
             instance=self.dataset_name,
             metric=accuracy,
             budget=40,
-            budget_type='mixed',
+            budget_type="mixed",
             additional_components=dict(),
         )
         info = read_queue(self.queue)
         self.assertEqual(len(info), 1)
-        self.assertAlmostEqual(info[0]['loss'], 0.06060606060606055)
-        self.assertEqual(info[0]['status'], StatusType.SUCCESS)
-        self.assertNotIn('bac_metric', info[0]['additional_run_info'])
+        self.assertAlmostEqual(info[0]["loss"], 0.06060606060606055)
+        self.assertEqual(info[0]["status"], StatusType.SUCCESS)
+        self.assertNotIn("bac_metric", info[0]["additional_run_info"])
 
     def test_eval_cv(self):
         eval_cv(
@@ -2539,8 +3222,8 @@ def test_eval_cv(self):
             backend=self.backend,
             seed=1,
             num_run=1,
-            resampling_strategy='cv',
-            resampling_strategy_args={'folds': 3},
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 3},
             scoring_functions=None,
             output_y_hat_optimization=True,
             include=None,
@@ -2552,9 +3235,9 @@ def test_eval_cv(self):
         )
         rval = read_queue(self.queue)
         self.assertEqual(len(rval), 1)
-        self.assertAlmostEqual(rval[0]['loss'], 0.04999999999999997)
-        self.assertEqual(rval[0]['status'], StatusType.SUCCESS)
-        self.assertNotIn('bac_metric', rval[0]['additional_run_info'])
+        self.assertAlmostEqual(rval[0]["loss"], 0.04999999999999997)
+        self.assertEqual(rval[0]["status"], StatusType.SUCCESS)
+        self.assertNotIn("bac_metric", rval[0]["additional_run_info"])
 
     def test_eval_cv_all_loss_functions(self):
         eval_cv(
@@ -2564,8 +3247,8 @@ def test_eval_cv_all_loss_functions(self):
             backend=self.backend,
             seed=1,
             num_run=1,
-            resampling_strategy='cv',
-            resampling_strategy_args={'folds': 3},
+            resampling_strategy="cv",
+            resampling_strategy_args={"folds": 3},
             scoring_functions=SCORER_LIST,
             output_y_hat_optimization=True,
             include=None,
@@ -2579,33 +3262,36 @@ def test_eval_cv_all_loss_functions(self):
         self.assertEqual(len(rval), 1)
 
         fixture = {
-            'accuracy': 0.04999999999999997,
-            'balanced_accuracy': 0.05130303030303027,
-            'f1_macro': 0.052793650793650775,
-            'f1_micro': 0.04999999999999997,
-            'f1_weighted': 0.050090909090909096,
-            'log_loss': 0.12108563414774837,
-            'precision_macro': 0.04963636363636359,
-            'precision_micro': 0.04999999999999997,
-            'precision_weighted': 0.045757575757575664,
-            'recall_macro': 0.05130303030303027,
-            'recall_micro': 0.04999999999999997,
-            'recall_weighted': 0.04999999999999997,
-            'num_run': 1,
-            'validation_loss': 0.04,
-            'test_loss': 0.04,
-            'train_loss': 0.0,
+            "accuracy": 0.04999999999999997,
+            "balanced_accuracy": 0.05130303030303027,
+            "f1_macro": 0.052793650793650775,
+            "f1_micro": 0.04999999999999997,
+            "f1_weighted": 0.050090909090909096,
+            "log_loss": 0.12108563414774837,
+            "precision_macro": 0.04963636363636359,
+            "precision_micro": 0.04999999999999997,
+            "precision_weighted": 0.045757575757575664,
+            "recall_macro": 0.05130303030303027,
+            "recall_micro": 0.04999999999999997,
+            "recall_weighted": 0.04999999999999997,
+            "num_run": 1,
+            "validation_loss": 0.04,
+            "test_loss": 0.04,
+            "train_loss": 0.0,
         }
 
-        additional_run_info = rval[0]['additional_run_info']
+        additional_run_info = rval[0]["additional_run_info"]
         for key, value in fixture.items():
             self.assertAlmostEqual(additional_run_info[key], fixture[key], msg=key)
-        self.assertIn('duration', additional_run_info)
-        self.assertEqual(len(additional_run_info), len(fixture) + 1,
-                         msg=sorted(additional_run_info.items()))
+        self.assertIn("duration", additional_run_info)
+        self.assertEqual(
+            len(additional_run_info),
+            len(fixture) + 1,
+            msg=sorted(additional_run_info.items()),
+        )
 
-        self.assertAlmostEqual(rval[0]['loss'], 0.04999999999999997)
-        self.assertEqual(rval[0]['status'], StatusType.SUCCESS)
+        self.assertAlmostEqual(rval[0]["loss"], 0.04999999999999997)
+        self.assertEqual(rval[0]["status"], StatusType.SUCCESS)
 
     # def test_eval_cv_on_subset(self):
     #     backend_api = backend.create(self.tmp_dir, self.tmp_dir)
@@ -2619,13 +3305,15 @@ def test_eval_cv_all_loss_functions(self):
     #     self.assertEqual(info[2], 1)
 
     def test_eval_partial_cv(self):
-        results = [0.050000000000000044,
-                   0.0,
-                   0.09999999999999998,
-                   0.09999999999999998,
-                   0.050000000000000044]
+        results = [
+            0.050000000000000044,
+            0.0,
+            0.09999999999999998,
+            0.09999999999999998,
+            0.050000000000000044,
+        ]
         for fold in range(5):
-            instance = json.dumps({'task_id': 'data', 'fold': fold})
+            instance = json.dumps({"task_id": "data", "fold": fold})
             eval_partial_cv(
                 port=self.port,
                 queue=self.queue,
@@ -2634,8 +3322,8 @@ def test_eval_partial_cv(self):
                 seed=1,
                 num_run=1,
                 instance=instance,
-                resampling_strategy='partial-cv',
-                resampling_strategy_args={'folds': 5},
+                resampling_strategy="partial-cv",
+                resampling_strategy_args={"folds": 5},
                 scoring_functions=None,
                 output_y_hat_optimization=True,
                 include=None,
@@ -2646,5 +3334,5 @@ def test_eval_partial_cv(self):
             )
             rval = read_queue(self.queue)
             self.assertEqual(len(rval), 1)
-            self.assertAlmostEqual(rval[0]['loss'], results[fold])
-            self.assertEqual(rval[0]['status'], StatusType.SUCCESS)
+            self.assertAlmostEqual(rval[0]["loss"], results[fold])
+            self.assertEqual(rval[0]["status"], StatusType.SUCCESS)
diff --git a/test/test_metalearning/__init__.py b/test/test_metalearning/__init__.py
index cc3cd7becd..e298f0f075 100644
--- a/test/test_metalearning/__init__.py
+++ b/test/test_metalearning/__init__.py
@@ -1,2 +1,2 @@
 # -*- encoding: utf-8 -*-
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_metalearning/pyMetaLearn/__init__.py b/test/test_metalearning/pyMetaLearn/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/test/test_metalearning/pyMetaLearn/__init__.py
+++ b/test/test_metalearning/pyMetaLearn/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_metalearning/pyMetaLearn/metalearning/test_kND.py b/test/test_metalearning/pyMetaLearn/metalearning/test_kND.py
index 6733dca93f..4877379440 100644
--- a/test/test_metalearning/pyMetaLearn/metalearning/test_kND.py
+++ b/test/test_metalearning/pyMetaLearn/metalearning/test_kND.py
@@ -1,8 +1,9 @@
 import logging
 import unittest
-import numpy as np
 
+import numpy as np
 import pandas as pd
+
 from autosklearn.metalearning.metalearning.kNearestDatasets.kND import KNearestDatasets
 from autosklearn.metalearning.metalearning.metrics.misc import get_random_metric
 
@@ -11,15 +12,35 @@ class kNDTest(unittest.TestCase):
     _multiprocess_can_split_ = True
 
     def setUp(self):
-        self.anneal = pd.Series({"number_of_instances": 898., "number_of_classes": 5.,
-                                 "number_of_features": 38.}, name=232)
-        self.krvskp = pd.Series({"number_of_instances": 3196., "number_of_classes":
-                                 2., "number_of_features": 36.}, name=233)
-        self.labor = pd.Series({"number_of_instances": 57., "number_of_classes":
-                                2., "number_of_features": 16.}, name=234)
-        self.runs = {232: [0.1, 0.5, 0.7],
-                     233: [np.NaN, 0.1, 0.7],
-                     234: [0.5, 0.7, 0.1]}
+        self.anneal = pd.Series(
+            {
+                "number_of_instances": 898.0,
+                "number_of_classes": 5.0,
+                "number_of_features": 38.0,
+            },
+            name=232,
+        )
+        self.krvskp = pd.Series(
+            {
+                "number_of_instances": 3196.0,
+                "number_of_classes": 2.0,
+                "number_of_features": 36.0,
+            },
+            name=233,
+        )
+        self.labor = pd.Series(
+            {
+                "number_of_instances": 57.0,
+                "number_of_classes": 2.0,
+                "number_of_features": 16.0,
+            },
+            name=234,
+        )
+        self.runs = {
+            232: [0.1, 0.5, 0.7],
+            233: [np.NaN, 0.1, 0.7],
+            234: [0.5, 0.7, 0.1],
+        }
         self.runs = pd.DataFrame(self.runs)
         self.logger = logging.getLogger()
 
@@ -30,43 +51,47 @@ def test_fit_l1_distance(self):
         self.assertEqual(kND.best_configuration_per_dataset[232], 0)
         self.assertEqual(kND.best_configuration_per_dataset[233], 1)
         self.assertEqual(kND.best_configuration_per_dataset[234], 2)
-        self.assertTrue((kND.metafeatures ==
-                         pd.DataFrame([self.anneal, self.krvskp, self.labor])).all().all())
+        self.assertTrue(
+            (kND.metafeatures == pd.DataFrame([self.anneal, self.krvskp, self.labor]))
+            .all()
+            .all()
+        )
 
     # TODO: rename to kNearestTasks or something
     def test_kNearestDatasets(self):
         kND = KNearestDatasets(logger=self.logger)
-        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
-                self.runs.loc[:, [233, 234]])
+        kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]])
 
         neighbor = kND.kNearestDatasets(self.anneal, 1)
         self.assertEqual([233], neighbor)
-        neighbor, distance = kND.kNearestDatasets(self.anneal, 1,
-                                                  return_distance=True)
+        neighbor, distance = kND.kNearestDatasets(self.anneal, 1, return_distance=True)
         self.assertEqual([233], neighbor)
         np.testing.assert_array_almost_equal([3.8320802803440586], distance)
 
         neighbors = kND.kNearestDatasets(self.anneal, 2)
         self.assertEqual([233, 234], neighbors)
-        neighbors, distance = kND.kNearestDatasets(self.anneal, 2,
-                                                   return_distance=True)
+        neighbors, distance = kND.kNearestDatasets(self.anneal, 2, return_distance=True)
         self.assertEqual([233, 234], neighbors)
-        np.testing.assert_array_almost_equal([3.8320802803440586, 4.367919719655942], distance)
+        np.testing.assert_array_almost_equal(
+            [3.8320802803440586, 4.367919719655942], distance
+        )
 
         neighbors = kND.kNearestDatasets(self.anneal, -1)
         self.assertEqual([233, 234], neighbors)
-        neighbors, distance = kND.kNearestDatasets(self.anneal, -1,
-                                                   return_distance=True)
+        neighbors, distance = kND.kNearestDatasets(
+            self.anneal, -1, return_distance=True
+        )
         self.assertEqual([233, 234], neighbors)
-        np.testing.assert_array_almost_equal([3.8320802803440586, 4.367919719655942], distance)
+        np.testing.assert_array_almost_equal(
+            [3.8320802803440586, 4.367919719655942], distance
+        )
 
         self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, 0)
         self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, -2)
 
     def test_kBestSuggestions(self):
         kND = KNearestDatasets(logger=self.logger)
-        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
-                self.runs.loc[:, [233, 234]])
+        kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]])
         neighbor = kND.kBestSuggestions(self.anneal, 1)
         np.testing.assert_array_almost_equal(
             [(233, 3.8320802803440586, 1)],
@@ -87,10 +112,10 @@ def test_kBestSuggestions(self):
         self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)
 
     def test_random_metric(self):
-        kND = KNearestDatasets(logger=self.logger,
-                               metric=get_random_metric(random_state=1))
-        kND.fit(pd.DataFrame([self.krvskp, self.labor]),
-                self.runs.loc[:, [233, 234]])
+        kND = KNearestDatasets(
+            logger=self.logger, metric=get_random_metric(random_state=1)
+        )
+        kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]])
         distances = []
         for i in range(20):
             neighbor = kND.kBestSuggestions(self.anneal, 1)
diff --git a/test/test_metalearning/pyMetaLearn/test_meta_base.py b/test/test_metalearning/pyMetaLearn/test_meta_base.py
index b1ac39ee2a..1c6788e816 100644
--- a/test/test_metalearning/pyMetaLearn/test_meta_base.py
+++ b/test/test_metalearning/pyMetaLearn/test_meta_base.py
@@ -14,7 +14,7 @@ class MetaBaseTest(unittest.TestCase):
     def setUp(self):
         self.cwd = os.getcwd()
         data_dir = os.path.dirname(__file__)
-        data_dir = os.path.join(data_dir, 'test_meta_base_data')
+        data_dir = os.path.join(data_dir, "test_meta_base_data")
         os.chdir(data_dir)
 
         pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
@@ -33,33 +33,34 @@ def test_get_all_runs(self):
         self.assertEqual((125, 125), runs.shape)
 
     def test_get_runs(self):
-        runs = self.base.get_runs('233')
+        runs = self.base.get_runs("233")
         # TODO update this ASAP
         self.assertEqual(125, len(runs))
         self.assertIsInstance(runs, pd.Series)
 
     def test_get_metafeatures_single_dataset(self):
-        mf = self.base.get_metafeatures('233')
+        mf = self.base.get_metafeatures("233")
         self.assertIsInstance(mf, pd.Series)
-        self.assertEqual(mf.name, '233')
-        self.assertEqual(mf.loc['NumberOfInstances'], 2142.0)
+        self.assertEqual(mf.name, "233")
+        self.assertEqual(mf.loc["NumberOfInstances"], 2142.0)
 
     def test_get_metafeatures_single_feature(self):
-        mf = self.base.get_metafeatures(features='NumberOfInstances')
+        mf = self.base.get_metafeatures(features="NumberOfInstances")
         self.assertIsInstance(mf, pd.Series)
-        self.assertEqual(mf.shape, (132, ))
+        self.assertEqual(mf.shape, (132,))
 
     def test_get_metafeatures_single_dataset_and_single_feature(self):
-        mf = self.base.get_metafeatures('233', features='NumberOfInstances')
+        mf = self.base.get_metafeatures("233", features="NumberOfInstances")
         self.assertEqual(mf.shape, ())
 
     def test_get_metafeatures_multiple_datasets(self):
-        mf = self.base.get_metafeatures(['233', '236'])
+        mf = self.base.get_metafeatures(["233", "236"])
         self.assertIsInstance(mf, pd.DataFrame)
         self.assertEqual(mf.shape, (2, 46))
 
     def test_get_metafeatures_multiple_features(self):
-        mf = self.base.get_metafeatures(features=['NumberOfInstances',
-                                                  'NumberOfClasses'])
+        mf = self.base.get_metafeatures(
+            features=["NumberOfInstances", "NumberOfClasses"]
+        )
         self.assertIsInstance(mf, pd.DataFrame)
         self.assertEqual(mf.shape, (132, 2))
diff --git a/test/test_metalearning/pyMetaLearn/test_meta_features.py b/test/test_metalearning/pyMetaLearn/test_meta_features.py
index d31f3d0227..6a9bec4dcf 100644
--- a/test/test_metalearning/pyMetaLearn/test_meta_features.py
+++ b/test/test_metalearning/pyMetaLearn/test_meta_features.py
@@ -3,25 +3,21 @@
 import tempfile
 import unittest
 
+import arff
+import numpy as np
 import pandas as pd
-
 import pytest
-
-import arff
 from joblib import Memory
-import numpy as np
-from sklearn.datasets import make_multilabel_classification, fetch_openml
+from sklearn.datasets import fetch_openml, make_multilabel_classification
 
-from autosklearn.pipeline.components.data_preprocessing.feature_type \
-    import FeatTypeSplit
-from autosklearn.metalearning.metafeatures.metafeature import MetaFeatureValue
 import autosklearn.metalearning.metafeatures.metafeatures as meta_features
+from autosklearn.metalearning.metafeatures.metafeature import MetaFeatureValue
+from autosklearn.pipeline.components.data_preprocessing.feature_type import (
+    FeatTypeSplit,
+)
 
 
-@pytest.fixture(
-    scope='class',
-    params=('pandas', 'numpy')
-)
+@pytest.fixture(scope="class", params=("pandas", "numpy"))
 def multilabel_train_data(request):
     cache = Memory(location=tempfile.gettempdir())
     cached_func = cache.cache(make_multilabel_classification)
@@ -31,20 +27,17 @@ def multilabel_train_data(request):
         n_classes=5,
         n_labels=5,
         return_indicator=True,
-        random_state=1
+        random_state=1,
     )
-    if request.param == 'numpy':
+    if request.param == "numpy":
         return X, y
-    elif request.param == 'pandas':
+    elif request.param == "pandas":
         return pd.DataFrame(X), y
     else:
         raise ValueError(request.param)
 
 
-@pytest.fixture(
-    scope='class',
-    params=('pandas', 'numpy')
-)
+@pytest.fixture(scope="class", params=("pandas", "numpy"))
 def meta_train_data(request):
     tests_dir = __file__
     os.chdir(os.path.dirname(tests_dir))
@@ -55,40 +48,41 @@ def meta_train_data(request):
 
     # -1 because the last attribute is the class
     attribute_types = [
-        'numeric' if type(type_) != list else 'nominal'
-        for name, type_ in dataset['attributes'][:-1]]
+        "numeric" if type(type_) != list else "nominal"
+        for name, type_ in dataset["attributes"][:-1]
+    ]
 
-    categorical = {i: True if attribute == 'nominal' else False
-                   for i, attribute in enumerate(attribute_types)}
+    categorical = {
+        i: True if attribute == "nominal" else False
+        for i, attribute in enumerate(attribute_types)
+    }
 
-    data = np.array(dataset['data'], dtype=np.float64)
+    data = np.array(dataset["data"], dtype=np.float64)
     X = data[:, :-1]
     y = data[:, -1].reshape((-1,))
 
-    logger = logging.getLogger('Meta')
+    logger = logging.getLogger("Meta")
     meta_features.helper_functions.set_value(
-        "MissingValues", meta_features.helper_functions["MissingValues"](X, y, logger, categorical),
-        )
+        "MissingValues",
+        meta_features.helper_functions["MissingValues"](X, y, logger, categorical),
+    )
     meta_features.helper_functions.set_value(
         "NumSymbols",
-        meta_features.helper_functions["NumSymbols"](X, y, logger,  categorical),
+        meta_features.helper_functions["NumSymbols"](X, y, logger, categorical),
     )
     meta_features.helper_functions.set_value(
         "ClassOccurences",
         meta_features.helper_functions["ClassOccurences"](X, y, logger),
     )
-    if request.param == 'numpy':
+    if request.param == "numpy":
         return X, y, categorical
-    elif request.param == 'pandas':
+    elif request.param == "pandas":
         return pd.DataFrame(X), y, categorical
     else:
         raise ValueError(request.param)
 
 
-@pytest.fixture(
-    scope='class',
-    params=('pandas', 'numpy')
-)
+@pytest.fixture(scope="class", params=("pandas", "numpy"))
 def meta_train_data_transformed(request):
     tests_dir = __file__
     os.chdir(os.path.dirname(tests_dir))
@@ -99,53 +93,67 @@ def meta_train_data_transformed(request):
 
     # -1 because the last attribute is the class
     attribute_types = [
-        'numeric' if type(type_) != list else 'nominal'
-        for name, type_ in dataset['attributes'][:-1]]
-    categorical = {i: True if attribute == 'nominal' else False
-                   for i, attribute in enumerate(attribute_types)}
+        "numeric" if type(type_) != list else "nominal"
+        for name, type_ in dataset["attributes"][:-1]
+    ]
+    categorical = {
+        i: True if attribute == "nominal" else False
+        for i, attribute in enumerate(attribute_types)
+    }
 
-    data = np.array(dataset['data'], dtype=np.float64)
+    data = np.array(dataset["data"], dtype=np.float64)
     X = data[:, :-1]
     y = data[:, -1].reshape((-1,))
 
-    logger = logging.getLogger('Meta')
+    logger = logging.getLogger("Meta")
     meta_features.helper_functions.set_value(
-        "MissingValues", meta_features.helper_functions["MissingValues"](X, y, logger, categorical),
-        )
+        "MissingValues",
+        meta_features.helper_functions["MissingValues"](X, y, logger, categorical),
+    )
     meta_features.helper_functions.set_value(
         "NumSymbols",
-        meta_features.helper_functions["NumSymbols"](X, y, logger,  categorical),
+        meta_features.helper_functions["NumSymbols"](X, y, logger, categorical),
     )
     meta_features.helper_functions.set_value(
         "ClassOccurences",
         meta_features.helper_functions["ClassOccurences"](X, y, logger),
     )
 
-    DPP = FeatTypeSplit(feat_type={
-        col: 'categorical' if category else 'numerical' for col, category in categorical.items()
-    })
+    DPP = FeatTypeSplit(
+        feat_type={
+            col: "categorical" if category else "numerical"
+            for col, category in categorical.items()
+        }
+    )
     X_transformed = DPP.fit_transform(X)
 
     number_numerical = np.sum(~np.array(list(categorical.values())))
-    categorical_transformed = {i: True if i < (X_transformed.shape[1] - number_numerical) else False
-                               for i in range(X_transformed.shape[1])}
+    categorical_transformed = {
+        i: True if i < (X_transformed.shape[1] - number_numerical) else False
+        for i in range(X_transformed.shape[1])
+    }
 
     # pre-compute values for transformed inputs
     meta_features.helper_functions.set_value(
-        "PCA", meta_features.helper_functions["PCA"](X_transformed, y, logger),
+        "PCA",
+        meta_features.helper_functions["PCA"](X_transformed, y, logger),
     )
     meta_features.helper_functions.set_value(
-        "Skewnesses", meta_features.helper_functions["Skewnesses"](
-            X_transformed, y, logger, categorical_transformed),
+        "Skewnesses",
+        meta_features.helper_functions["Skewnesses"](
+            X_transformed, y, logger, categorical_transformed
+        ),
     )
     meta_features.helper_functions.set_value(
-        "Kurtosisses", meta_features.helper_functions["Kurtosisses"](
-            X_transformed, y, logger, categorical_transformed)
+        "Kurtosisses",
+        meta_features.helper_functions["Kurtosisses"](
+            X_transformed, y, logger, categorical_transformed
+        ),
     )
 
-    if request.param == 'numpy':
+    if request.param == "numpy":
         return X_transformed, y, categorical_transformed
-    elif request.param == 'pandas':
+    elif request.param == "pandas":
         return pd.DataFrame(X_transformed), y, categorical_transformed
     else:
         raise ValueError(request.param)
@@ -154,7 +162,8 @@ def meta_train_data_transformed(request):
 def test_number_of_instance(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["NumberOfInstances"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 898
     assert isinstance(mf, MetaFeatureValue)
 
@@ -162,7 +171,8 @@ def test_number_of_instance(meta_train_data):
 def test_number_of_classes(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["NumberOfClasses"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 5
     assert isinstance(mf, MetaFeatureValue)
 
@@ -170,7 +180,8 @@ def test_number_of_classes(meta_train_data):
 def test_number_of_features(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["NumberOfFeatures"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 38
     assert isinstance(mf, MetaFeatureValue)
 
@@ -178,8 +189,9 @@ def test_number_of_features(meta_train_data):
 def test_missing_values(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.helper_functions["MissingValues"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    assert isinstance(mf.value, pd.DataFrame if hasattr(X, 'iloc') else np.ndarray)
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert isinstance(mf.value, pd.DataFrame if hasattr(X, "iloc") else np.ndarray)
     assert mf.value.shape == X.shape
     assert 22175 == np.count_nonzero(mf.value)
 
@@ -187,7 +199,8 @@ def test_missing_values(meta_train_data):
 def test_number_of_Instances_with_missing_values(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["NumberOfInstancesWithMissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 898
     assert isinstance(mf, MetaFeatureValue)
 
@@ -197,10 +210,12 @@ def test_percentage_of_Instances_with_missing_values(meta_train_data):
     meta_features.metafeatures.set_value(
         "NumberOfInstancesWithMissingValues",
         meta_features.metafeatures["NumberOfInstancesWithMissingValues"](
-            X, y, logging.getLogger('Meta'),  categorical),
-        )
+            X, y, logging.getLogger("Meta"), categorical
+        ),
+    )
     mf = meta_features.metafeatures["PercentageOfInstancesWithMissingValues"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert pytest.approx(mf.value) == 1.0
     assert isinstance(mf, MetaFeatureValue)
 
@@ -208,7 +223,8 @@ def test_percentage_of_Instances_with_missing_values(meta_train_data):
 def test_number_of_features_with_missing_values(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["NumberOfFeaturesWithMissingValues"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 29
     assert isinstance(mf, MetaFeatureValue)
 
@@ -218,18 +234,22 @@ def test_percentage_of_features_with_missing_values(meta_train_data):
     meta_features.metafeatures.set_value(
         "NumberOfFeaturesWithMissingValues",
         meta_features.metafeatures["NumberOfFeaturesWithMissingValues"](
-            X, y, logging.getLogger('Meta'),  categorical))
+            X, y, logging.getLogger("Meta"), categorical
+        ),
+    )
     mf = meta_features.metafeatures["PercentageOfFeaturesWithMissingValues"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    assert pytest.approx(mf.value) == float(29)/float(38)
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == float(29) / float(38)
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_number_of_missing_values(meta_train_data):
     X, y, categorical = meta_train_data
-    np.save('/tmp/debug', X)
+    np.save("/tmp/debug", X)
     mf = meta_features.metafeatures["NumberOfMissingValues"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 22175
     assert isinstance(mf, MetaFeatureValue)
 
@@ -237,18 +257,23 @@ def test_number_of_missing_values(meta_train_data):
 def test_percentage_missing_values(meta_train_data):
     X, y, categorical = meta_train_data
     meta_features.metafeatures.set_value(
-        "NumberOfMissingValues", meta_features.metafeatures["NumberOfMissingValues"](
-            X, y, logging.getLogger('Meta'),  categorical))
+        "NumberOfMissingValues",
+        meta_features.metafeatures["NumberOfMissingValues"](
+            X, y, logging.getLogger("Meta"), categorical
+        ),
+    )
     mf = meta_features.metafeatures["PercentageOfMissingValues"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    assert pytest.approx(mf.value) == (float(22175)/float(38*898))
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == (float(22175) / float(38 * 898))
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_number_of_numeric_features(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["NumberOfNumericFeatures"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 6
     assert isinstance(mf, MetaFeatureValue)
 
@@ -256,7 +281,8 @@ def test_number_of_numeric_features(meta_train_data):
 def test_number_of_categorical_features(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["NumberOfCategoricalFeatures"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 32
     assert isinstance(mf, MetaFeatureValue)
 
@@ -264,62 +290,70 @@ def test_number_of_categorical_features(meta_train_data):
 def test_ratio_numerical_to_categorical(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["RatioNumericalToNominal"](
-        X, y, logging.getLogger('Meta'), categorical)
-    assert pytest.approx(mf.value) == (float(6)/float(32))
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == (float(6) / float(32))
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_ratio_categorical_to_numerical(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["RatioNominalToNumerical"](
-        X, y, logging.getLogger('Meta'), categorical)
-    assert pytest.approx(mf.value) == (float(32)/float(6))
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == (float(32) / float(6))
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_dataset_ratio(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["DatasetRatio"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    assert pytest.approx(mf.value) == (float(38)/float(898))
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == (float(38) / float(898))
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_inverse_dataset_ratio(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["InverseDatasetRatio"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    assert pytest.approx(mf.value) == (float(898)/float(38))
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == (float(898) / float(38))
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_class_occurences(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.helper_functions["ClassOccurences"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == {0.0: 8.0, 1.0: 99.0, 2.0: 684.0, 4.0: 67.0, 5.0: 40.0}
 
 
 def test_class_probability_min(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["ClassProbabilityMin"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    assert pytest.approx(mf.value) == (float(8)/float(898))
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == (float(8) / float(898))
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_class_probability_max(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["ClassProbabilityMax"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    assert pytest.approx(mf.value) == (float(684)/float(898))
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    assert pytest.approx(mf.value) == (float(684) / float(898))
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_class_probability_mean(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["ClassProbabilityMean"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     classes = np.array((8, 99, 684, 67, 40), dtype=np.float64)
     prob_mean = (classes / float(898)).mean()
     assert pytest.approx(mf.value) == prob_mean
@@ -329,7 +363,8 @@ def test_class_probability_mean(meta_train_data):
 def test_class_probability_std(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["ClassProbabilitySTD"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     classes = np.array((8, 99, 684, 67, 40), dtype=np.float64)
     prob_std = (classes / float(898)).std()
     assert pytest.approx(mf.value) == prob_std
@@ -339,53 +374,148 @@ def test_class_probability_std(meta_train_data):
 def test_num_symbols(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.helper_functions["NumSymbols"](
-        X, y, logging.getLogger('Meta'),  categorical)
-    symbol_frequency = [2, 1, 7, 1, 2, 4, 1, 1, 4, 2, 1, 1, 1, 2, 1, 0,
-                        1, 1, 1, 0, 1, 1, 0, 3, 1, 0, 0, 0, 2, 2, 3, 2]
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    symbol_frequency = [
+        2,
+        1,
+        7,
+        1,
+        2,
+        4,
+        1,
+        1,
+        4,
+        2,
+        1,
+        1,
+        1,
+        2,
+        1,
+        0,
+        1,
+        1,
+        1,
+        0,
+        1,
+        1,
+        0,
+        3,
+        1,
+        0,
+        0,
+        0,
+        2,
+        2,
+        3,
+        2,
+    ]
     assert mf.value == symbol_frequency
 
 
 def test_symbols_min(meta_train_data):
     X, y, categorical = meta_train_data
-    mf = meta_features.metafeatures["SymbolsMin"](X, y, logging.getLogger('Meta'),  categorical)
+    mf = meta_features.metafeatures["SymbolsMin"](
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 1
 
 
 def test_symbols_max(meta_train_data):
     X, y, categorical = meta_train_data
     # this is attribute steel
-    mf = meta_features.metafeatures["SymbolsMax"](X, y, logging.getLogger('Meta'),  categorical)
+    mf = meta_features.metafeatures["SymbolsMax"](
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 7
 
 
 def test_symbols_mean(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["SymbolsMean"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     # Empty looking spaces denote empty attributes
-    symbol_frequency = [2, 1, 7, 1, 2, 4, 1, 1, 4, 2, 1, 1, 1, 2, 1,  #
-                        1, 1, 1,   1, 1,    3, 1,           2, 2, 3, 2]
+    symbol_frequency = [
+        2,
+        1,
+        7,
+        1,
+        2,
+        4,
+        1,
+        1,
+        4,
+        2,
+        1,
+        1,
+        1,
+        2,
+        1,  #
+        1,
+        1,
+        1,
+        1,
+        1,
+        3,
+        1,
+        2,
+        2,
+        3,
+        2,
+    ]
     assert pytest.approx(mf.value) == np.mean(symbol_frequency)
 
 
 def test_symbols_std(meta_train_data):
     X, y, categorical = meta_train_data
-    mf = meta_features.metafeatures["SymbolsSTD"](X, y, logging.getLogger('Meta'),  categorical)
-    symbol_frequency = [2, 1, 7, 1, 2, 4, 1, 1, 4, 2, 1, 1, 1, 2, 1,  #
-                        1, 1, 1,   1, 1,    3, 1,           2, 2, 3, 2]
+    mf = meta_features.metafeatures["SymbolsSTD"](
+        X, y, logging.getLogger("Meta"), categorical
+    )
+    symbol_frequency = [
+        2,
+        1,
+        7,
+        1,
+        2,
+        4,
+        1,
+        1,
+        4,
+        2,
+        1,
+        1,
+        1,
+        2,
+        1,  #
+        1,
+        1,
+        1,
+        1,
+        1,
+        3,
+        1,
+        2,
+        2,
+        3,
+        2,
+    ]
     assert pytest.approx(mf.value) == np.std(symbol_frequency)
 
 
 def test_symbols_sum(meta_train_data):
     X, y, categorical = meta_train_data
-    mf = meta_features.metafeatures["SymbolsSum"](X, y, logging.getLogger('Meta'),  categorical)
+    mf = meta_features.metafeatures["SymbolsSum"](
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 49
 
 
 def test_class_entropy(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.metafeatures["ClassEntropy"](
-        X, y, logging.getLogger('Meta'),  categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     classes = np.array((8, 99, 684, 67, 40), dtype=np.float64)
     classes = classes / sum(classes)
     entropy = -np.sum([c * np.log2(c) for c in classes])
@@ -396,15 +526,17 @@ def test_class_entropy(meta_train_data):
 def test_calculate_all_metafeatures(meta_train_data):
     X, y, categorical = meta_train_data
     mf = meta_features.calculate_all_metafeatures(
-        X, y, categorical, "2", logger=logging.getLogger('Meta'))
+        X, y, categorical, "2", logger=logging.getLogger("Meta")
+    )
     assert 52 == len(mf.metafeature_values)
-    assert mf.metafeature_values['NumberOfCategoricalFeatures'].value == 32
+    assert mf.metafeature_values["NumberOfCategoricalFeatures"].value == 32
 
 
 def test_kurtosisses(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     mf = meta_features.helper_functions["Kurtosisses"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
     assert 6 == len(mf.value)
 
 
@@ -412,34 +544,39 @@ def test_kurtosis_min(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["KurtosisMin"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_kurtosis_max(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["KurtosisMax"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_kurtosis_mean(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["KurtosisMean"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_kurtosis_std(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["KurtosisSTD"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_skewnesses(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     mf = meta_features.helper_functions["Skewnesses"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
     assert 6 == len(mf.value)
 
 
@@ -447,62 +584,72 @@ def test_skewness_min(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["SkewnessMin"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_skewness_max(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["SkewnessMax"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_skewness_mean(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["SkewnessMean"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_skewness_std(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["SkewnessSTD"](
-        X_transformed, y, logging.getLogger('Meta'), categorical_transformed)
+        X_transformed, y, logging.getLogger("Meta"), categorical_transformed
+    )
 
 
 def test_landmark_lda(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
-    meta_features.metafeatures["LandmarkLDA"](X_transformed, y, logging.getLogger('Meta'))
+    meta_features.metafeatures["LandmarkLDA"](
+        X_transformed, y, logging.getLogger("Meta")
+    )
 
 
 def test_landmark_naive_bayes(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["LandmarkNaiveBayes"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
 
 
 def test_landmark_decision_tree(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["LandmarkDecisionTree"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
 
 
 def test_decision_node(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["LandmarkDecisionNodeLearner"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
 
 
 def test_random_node(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["LandmarkRandomNodeLearner"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
 
 
 @unittest.skip("Currently not implemented!")
@@ -510,57 +657,72 @@ def test_worst_node(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
     meta_features.metafeatures["LandmarkWorstNodeLearner"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
 
 
 def test_1NN(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     # TODO: somehow compute the expected output?
-    meta_features.metafeatures["Landmark1NN"](X_transformed, y, logging.getLogger('Meta'))
+    meta_features.metafeatures["Landmark1NN"](
+        X_transformed, y, logging.getLogger("Meta")
+    )
 
 
 def test_pca(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
-    meta_features.helper_functions["PCA"](X_transformed, y, logging.getLogger('Meta'))
+    meta_features.helper_functions["PCA"](X_transformed, y, logging.getLogger("Meta"))
 
 
 def test_pca_95percent(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     mf = meta_features.metafeatures["PCAFractionOfComponentsFor95PercentVariance"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(0.2716049382716049) == mf.value
 
 
 def test_pca_kurtosis_first_pc(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     mf = meta_features.metafeatures["PCAKurtosisFirstPC"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(-0.702850) != mf.value
 
 
 def test_pca_skewness_first_pc(meta_train_data_transformed):
     X_transformed, y, categorical_transformed = meta_train_data_transformed
     mf = meta_features.metafeatures["PCASkewnessFirstPC"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(0.051210) != mf.value
 
 
 def test_class_occurences_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
-    mf = meta_features.helper_functions["ClassOccurences"](X, y, logging.getLogger('Meta'))
-    assert mf.value == [{0: 16.0, 1: 84.0},
-                        {0: 8.0, 1: 92.0},
-                        {0: 68.0, 1: 32.0},
-                        {0: 15.0, 1: 85.0},
-                        {0: 28.0, 1: 72.0}]
+    mf = meta_features.helper_functions["ClassOccurences"](
+        X, y, logging.getLogger("Meta")
+    )
+    assert mf.value == [
+        {0: 16.0, 1: 84.0},
+        {0: 8.0, 1: 92.0},
+        {0: 68.0, 1: 32.0},
+        {0: 15.0, 1: 85.0},
+        {0: 28.0, 1: 72.0},
+    ]
 
 
 def test_class_probability_min_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
     meta_features.helper_functions.set_value(
-        "ClassOccurences", meta_features.helper_functions["ClassOccurences"](
-            X, y, logging.getLogger('Meta')))
-    mf = meta_features.metafeatures["ClassProbabilityMin"](X, y, logging.getLogger('Meta'))
+        "ClassOccurences",
+        meta_features.helper_functions["ClassOccurences"](
+            X, y, logging.getLogger("Meta")
+        ),
+    )
+    mf = meta_features.metafeatures["ClassProbabilityMin"](
+        X, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(mf.value) == (float(8) / float(100))
     assert isinstance(mf, MetaFeatureValue)
 
@@ -568,9 +730,14 @@ def test_class_probability_min_multilabel(multilabel_train_data):
 def test_class_probability_max_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
     meta_features.helper_functions.set_value(
-        "ClassOccurences", meta_features.helper_functions["ClassOccurences"](
-            X, y, logging.getLogger('Meta')))
-    mf = meta_features.metafeatures["ClassProbabilityMax"](X, y, logging.getLogger('Meta'))
+        "ClassOccurences",
+        meta_features.helper_functions["ClassOccurences"](
+            X, y, logging.getLogger("Meta")
+        ),
+    )
+    mf = meta_features.metafeatures["ClassProbabilityMax"](
+        X, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(mf.value) == (float(92) / float(100))
     assert isinstance(mf, MetaFeatureValue)
 
@@ -578,9 +745,14 @@ def test_class_probability_max_multilabel(multilabel_train_data):
 def test_class_probability_mean_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
     meta_features.helper_functions.set_value(
-        "ClassOccurences", meta_features.helper_functions["ClassOccurences"](
-            X, y, logging.getLogger('Meta')))
-    mf = meta_features.metafeatures["ClassProbabilityMean"](X, y, logging.getLogger('Meta'))
+        "ClassOccurences",
+        meta_features.helper_functions["ClassOccurences"](
+            X, y, logging.getLogger("Meta")
+        ),
+    )
+    mf = meta_features.metafeatures["ClassProbabilityMean"](
+        X, y, logging.getLogger("Meta")
+    )
     classes = [(16, 84), (8, 92), (68, 32), (15, 85), (28, 72)]
     probas = np.mean([np.mean(np.array(cls_)) / 100 for cls_ in classes])
     assert mf.value == pytest.approx(probas)
@@ -589,7 +761,7 @@ def test_class_probability_mean_multilabel(multilabel_train_data):
 
 def test_number_of_classes_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
-    mf = meta_features.metafeatures["NumberOfClasses"](X, y, logging.getLogger('Meta'))
+    mf = meta_features.metafeatures["NumberOfClasses"](X, y, logging.getLogger("Meta"))
     assert mf.value == 5
     assert isinstance(mf, MetaFeatureValue)
 
@@ -597,18 +769,23 @@ def test_number_of_classes_multilabel(multilabel_train_data):
 def test_class_probability_std_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
     meta_features.helper_functions.set_value(
-        "ClassOccurences", meta_features.helper_functions["ClassOccurences"](
-            X, y, logging.getLogger('Meta')))
-    mf = meta_features.metafeatures["ClassProbabilitySTD"](X, y, logging.getLogger('Meta'))
+        "ClassOccurences",
+        meta_features.helper_functions["ClassOccurences"](
+            X, y, logging.getLogger("Meta")
+        ),
+    )
+    mf = meta_features.metafeatures["ClassProbabilitySTD"](
+        X, y, logging.getLogger("Meta")
+    )
     classes = [(16, 84), (8, 92), (68, 32), (15, 85), (28, 72)]
-    probas = np.mean([np.std(np.array(cls_) / 100.) for cls_ in classes])
+    probas = np.mean([np.std(np.array(cls_) / 100.0) for cls_ in classes])
     assert pytest.approx(mf.value) == probas
     assert isinstance(mf, MetaFeatureValue)
 
 
 def test_class_entropy_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
-    mf = meta_features.metafeatures["ClassEntropy"](X, y, logging.getLogger('Meta'))
+    mf = meta_features.metafeatures["ClassEntropy"](X, y, logging.getLogger("Meta"))
 
     classes = [(16, 84), (8, 92), (68, 32), (15, 85), (28, 72)]
     entropies = []
@@ -623,39 +800,45 @@ def test_class_entropy_multilabel(multilabel_train_data):
 
 def test_landmark_lda_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
-    mf = meta_features.metafeatures["LandmarkLDA"](X, y, logging.getLogger('Meta'))
+    mf = meta_features.metafeatures["LandmarkLDA"](X, y, logging.getLogger("Meta"))
     assert np.isfinite(mf.value)
 
 
 def test_landmark_naive_bayes_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
-    mf = meta_features.metafeatures["LandmarkNaiveBayes"](X, y, logging.getLogger('Meta'))
+    mf = meta_features.metafeatures["LandmarkNaiveBayes"](
+        X, y, logging.getLogger("Meta")
+    )
     assert np.isfinite(mf.value)
 
 
 def test_landmark_decision_tree_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
-    mf = meta_features.metafeatures["LandmarkDecisionTree"](X, y, logging.getLogger('Meta'))
+    mf = meta_features.metafeatures["LandmarkDecisionTree"](
+        X, y, logging.getLogger("Meta")
+    )
     assert np.isfinite(mf.value)
 
 
 def test_landmark_decision_node_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
     mf = meta_features.metafeatures["LandmarkDecisionNodeLearner"](
-        X, y, logging.getLogger('Meta'))
+        X, y, logging.getLogger("Meta")
+    )
     assert np.isfinite(mf.value)
 
 
 def test_landmark_random_node_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
     mf = meta_features.metafeatures["LandmarkRandomNodeLearner"](
-        X, y, logging.getLogger('Meta'))
+        X, y, logging.getLogger("Meta")
+    )
     assert np.isfinite(mf.value)
 
 
 def test_1NN_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
-    mf = meta_features.metafeatures["Landmark1NN"](X, y, logging.getLogger('TestMeta'))
+    mf = meta_features.metafeatures["Landmark1NN"](X, y, logging.getLogger("TestMeta"))
     assert np.isfinite(mf.value)
 
 
@@ -664,7 +847,8 @@ def test_calculate_all_metafeatures_multilabel(multilabel_train_data):
     X, y = multilabel_train_data
     categorical = {i: False for i in range(10)}
     mf = meta_features.calculate_all_metafeatures(
-        X, y,  categorical, "Generated", logger=logging.getLogger('TestMeta'))
+        X, y, categorical, "Generated", logger=logging.getLogger("TestMeta")
+    )
     assert 52 == len(mf.metafeature_values)
 
 
@@ -675,77 +859,84 @@ def test_calculate_all_metafeatures_same_results_across_datatypes():
     all metafeatures work in this complex dataset
     """
     X, y = fetch_openml(data_id=2, return_X_y=True, as_frame=True)
-    categorical = {col: True if X[col].dtype.name == 'category' else False
-                   for col in X.columns}
+    categorical = {
+        col: True if X[col].dtype.name == "category" else False for col in X.columns
+    }
     mf = meta_features.calculate_all_metafeatures(
-        X, y, categorical, "2", logger=logging.getLogger('Meta'))
+        X, y, categorical, "2", logger=logging.getLogger("Meta")
+    )
     assert 52 == len(mf.metafeature_values)
     expected = {
-        'PCASkewnessFirstPC': 0.41897660337677867,
-        'PCAKurtosisFirstPC': -0.677692541156901,
-        'PCAFractionOfComponentsFor95PercentVariance': 0.2716049382716049,
-        'ClassEntropy': 1.1898338562043977,
-        'SkewnessSTD': 7.540418815675546,
-        'SkewnessMean': 1.47397188548894,
-        'SkewnessMax': 29.916569235579203,
-        'SkewnessMin': -29.916569235579203,
-        'KurtosisSTD': 153.0563504598898,
-        'KurtosisMean': 56.998860939761165,
-        'KurtosisMax': 893.0011148272025,
-        'KurtosisMin': -3.0,
-        'SymbolsSum': 49,
-        'SymbolsSTD': 1.3679553264445183,
-        'SymbolsMean': 1.8846153846153846,
-        'SymbolsMax': 7,
-        'SymbolsMin': 1,
-        'ClassProbabilitySTD': 0.28282850691819206,
-        'ClassProbabilityMean': 0.2,
-        'ClassProbabilityMax': 0.7616926503340757,
-        'ClassProbabilityMin': 0.008908685968819599,
-        'InverseDatasetRatio': 23.63157894736842,
-        'DatasetRatio': 0.042316258351893093,
-        'RatioNominalToNumerical': 5.333333333333333,
-        'RatioNumericalToNominal': 0.1875,
-        'NumberOfCategoricalFeatures': 32,
-        'NumberOfNumericFeatures': 6,
-        'NumberOfMissingValues': 22175.0,
-        'NumberOfFeaturesWithMissingValues': 29.0,
-        'NumberOfInstancesWithMissingValues': 898.0,
-        'NumberOfFeatures': 38.0,
-        'NumberOfClasses': 5.0,
-        'NumberOfInstances': 898.0,
-        'LogInverseDatasetRatio': 3.162583908575814,
-        'LogDatasetRatio': -3.162583908575814,
-        'PercentageOfMissingValues': 0.6498358926268901,
-        'PercentageOfFeaturesWithMissingValues': 0.7631578947368421,
-        'PercentageOfInstancesWithMissingValues': 1.0,
-        'LogNumberOfFeatures': 3.6375861597263857,
-        'LogNumberOfInstances': 6.8001700683022,
+        "PCASkewnessFirstPC": 0.41897660337677867,
+        "PCAKurtosisFirstPC": -0.677692541156901,
+        "PCAFractionOfComponentsFor95PercentVariance": 0.2716049382716049,
+        "ClassEntropy": 1.1898338562043977,
+        "SkewnessSTD": 7.540418815675546,
+        "SkewnessMean": 1.47397188548894,
+        "SkewnessMax": 29.916569235579203,
+        "SkewnessMin": -29.916569235579203,
+        "KurtosisSTD": 153.0563504598898,
+        "KurtosisMean": 56.998860939761165,
+        "KurtosisMax": 893.0011148272025,
+        "KurtosisMin": -3.0,
+        "SymbolsSum": 49,
+        "SymbolsSTD": 1.3679553264445183,
+        "SymbolsMean": 1.8846153846153846,
+        "SymbolsMax": 7,
+        "SymbolsMin": 1,
+        "ClassProbabilitySTD": 0.28282850691819206,
+        "ClassProbabilityMean": 0.2,
+        "ClassProbabilityMax": 0.7616926503340757,
+        "ClassProbabilityMin": 0.008908685968819599,
+        "InverseDatasetRatio": 23.63157894736842,
+        "DatasetRatio": 0.042316258351893093,
+        "RatioNominalToNumerical": 5.333333333333333,
+        "RatioNumericalToNominal": 0.1875,
+        "NumberOfCategoricalFeatures": 32,
+        "NumberOfNumericFeatures": 6,
+        "NumberOfMissingValues": 22175.0,
+        "NumberOfFeaturesWithMissingValues": 29.0,
+        "NumberOfInstancesWithMissingValues": 898.0,
+        "NumberOfFeatures": 38.0,
+        "NumberOfClasses": 5.0,
+        "NumberOfInstances": 898.0,
+        "LogInverseDatasetRatio": 3.162583908575814,
+        "LogDatasetRatio": -3.162583908575814,
+        "PercentageOfMissingValues": 0.6498358926268901,
+        "PercentageOfFeaturesWithMissingValues": 0.7631578947368421,
+        "PercentageOfInstancesWithMissingValues": 1.0,
+        "LogNumberOfFeatures": 3.6375861597263857,
+        "LogNumberOfInstances": 6.8001700683022,
     }
     assert {k: mf[k].value for k in expected.keys()} == pytest.approx(expected)
 
     expected_landmarks = {
-        'Landmark1NN': 0.9721601489757914,
-        'LandmarkRandomNodeLearner': 0.7616945996275606,
-        'LandmarkDecisionNodeLearner':  0.7827932960893855,
-        'LandmarkDecisionTree': 0.9899875853507139,
-        'LandmarkNaiveBayes': 0.9287150837988827,
-        'LandmarkLDA': 0.9610242085661079,
+        "Landmark1NN": 0.9721601489757914,
+        "LandmarkRandomNodeLearner": 0.7616945996275606,
+        "LandmarkDecisionNodeLearner": 0.7827932960893855,
+        "LandmarkDecisionTree": 0.9899875853507139,
+        "LandmarkNaiveBayes": 0.9287150837988827,
+        "LandmarkLDA": 0.9610242085661079,
     }
     assert {k: mf[k].value for k in expected_landmarks.keys()} == pytest.approx(
-        expected_landmarks, rel=1e-5)
+        expected_landmarks, rel=1e-5
+    )
 
     # Then do numpy!
     X, y = fetch_openml(data_id=2, return_X_y=True, as_frame=False)
-    categorical = {i: True if category else False
-                   for i, category in enumerate(categorical.values())}
+    categorical = {
+        i: True if category else False
+        for i, category in enumerate(categorical.values())
+    }
     mf = meta_features.calculate_all_metafeatures(
-        X, y, categorical, "2", logger=logging.getLogger('Meta'))
+        X, y, categorical, "2", logger=logging.getLogger("Meta")
+    )
     assert {k: mf[k].value for k in expected.keys()} == pytest.approx(expected)
 
     # The column-reorder of pandas and numpy array are different after
     # the data preprocessing. So we cannot directly compare, and landmarking is
     # sensible to column order
-    expected_landmarks['LandmarkDecisionTree'] = 0.9922098075729361
+    expected_landmarks["LandmarkDecisionTree"] = 0.9922098075729361
     assert {k: mf[k].value for k in expected_landmarks.keys()} == pytest.approx(
-        expected_landmarks, rel=1e-5)
+        expected_landmarks, rel=1e-5
+    )
diff --git a/test/test_metalearning/pyMetaLearn/test_meta_features_sparse.py b/test/test_metalearning/pyMetaLearn/test_meta_features_sparse.py
index 3239184469..856fd595cb 100644
--- a/test/test_metalearning/pyMetaLearn/test_meta_features_sparse.py
+++ b/test/test_metalearning/pyMetaLearn/test_meta_features_sparse.py
@@ -2,19 +2,16 @@
 import os
 
 import arff
-
 import numpy as np
-
 import pytest
-
 from scipy import sparse
-
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import StandardScaler
 
-from autosklearn.pipeline.components.data_preprocessing.feature_type \
-    import FeatTypeSplit
 import autosklearn.metalearning.metafeatures.metafeatures as meta_features
+from autosklearn.pipeline.components.data_preprocessing.feature_type import (
+    FeatTypeSplit,
+)
 
 
 @pytest.fixture
@@ -28,12 +25,15 @@ def sparse_data():
 
     # -1 because the last attribute is the class
     attribute_types = [
-        'numeric' if type(type_) != list else 'nominal'
-        for name, type_ in dataset['attributes'][:-1]]
-    categorical = {i: True if attribute == 'nominal' else False
-                   for i, attribute in enumerate(attribute_types)}
+        "numeric" if type(type_) != list else "nominal"
+        for name, type_ in dataset["attributes"][:-1]
+    ]
+    categorical = {
+        i: True if attribute == "nominal" else False
+        for i, attribute in enumerate(attribute_types)
+    }
 
-    data = np.array(dataset['data'], dtype=np.float64)
+    data = np.array(dataset["data"], dtype=np.float64)
     X = data[:, :-1]
     y = data[:, -1].reshape((-1,))
 
@@ -53,19 +53,19 @@ def sparse_data():
     helpers.set_value(
         "MissingValues",
         helpers["MissingValues"](X, y, logger, categorical),
-        )
+    )
     mf.set_value(
         "NumberOfMissingValues",
         mf["NumberOfMissingValues"](X, y, logger, categorical),
-        )
+    )
     helpers.set_value(
         "NumSymbols",
         helpers["NumSymbols"](X, y, logger, categorical),
-        )
+    )
     helpers.set_value(
         "ClassOccurences",
         helpers["ClassOccurences"](X, y, logger),
-        )
+    )
     return X, y, categorical
 
 
@@ -80,12 +80,15 @@ def sparse_data_transformed():
 
     # -1 because the last attribute is the class
     attribute_types = [
-        'numeric' if type(type_) != list else 'nominal'
-        for name, type_ in dataset['attributes'][:-1]]
-    categorical = {i: True if attribute == 'nominal' else False
-                   for i, attribute in enumerate(attribute_types)}
+        "numeric" if type(type_) != list else "nominal"
+        for name, type_ in dataset["attributes"][:-1]
+    ]
+    categorical = {
+        i: True if attribute == "nominal" else False
+        for i, attribute in enumerate(attribute_types)
+    }
 
-    data = np.array(dataset['data'], dtype=np.float64)
+    data = np.array(dataset["data"], dtype=np.float64)
     X = data[:, :-1]
     y = data[:, -1].reshape((-1,))
 
@@ -96,10 +99,12 @@ def sparse_data_transformed():
     X_sparse[NaNs] = 0
     X_sparse = sparse.csr_matrix(X_sparse)
 
-    ohe = FeatTypeSplit(feat_type={
-        col: 'categorical' if category else 'numerical'
-        for col, category in categorical.items()
-    })
+    ohe = FeatTypeSplit(
+        feat_type={
+            col: "categorical" if category else "numerical"
+            for col, category in categorical.items()
+        }
+    )
     X_transformed = X_sparse.copy()
     X_transformed = ohe.fit_transform(X_transformed)
     imp = SimpleImputer(copy=False)
@@ -109,8 +114,10 @@ def sparse_data_transformed():
 
     # Transform the array which indicates the categorical metafeatures
     number_numerical = np.sum(~np.array(list(categorical.values())))
-    categorical_transformed = {i: True if i < (X_transformed.shape[1] - number_numerical) else False
-                               for i in range(X_transformed.shape[1])}
+    categorical_transformed = {
+        i: True if i < (X_transformed.shape[1] - number_numerical) else False
+        for i in range(X_transformed.shape[1])
+    }
 
     X = X_sparse
     X_transformed = X_transformed
@@ -123,28 +130,27 @@ def sparse_data_transformed():
     helpers.set_value(
         "PCA",
         helpers["PCA"](X_transformed, y, logger),
-        )
+    )
     helpers.set_value(
         "MissingValues",
         helpers["MissingValues"](X, y, logger, categorical),
-        )
+    )
     mf.set_value(
         "NumberOfMissingValues",
         mf["NumberOfMissingValues"](X, y, logger, categorical),
-        )
+    )
     helpers.set_value(
         "NumSymbols",
         helpers["NumSymbols"](X, y, logger, categorical),
-        )
+    )
     helpers.set_value(
         "ClassOccurences",
         helpers["ClassOccurences"](X, y, logger),
-        )
+    )
     helpers.set_value(
         "Skewnesses",
-        helpers["Skewnesses"](X_transformed, y, logger,
-                              categorical_transformed),
-        )
+        helpers["Skewnesses"](X_transformed, y, logger, categorical_transformed),
+    )
     helpers.set_value(
         "Kurtosisses",
         helpers["Kurtosisses"](X_transformed, y, logger, categorical_transformed),
@@ -155,7 +161,8 @@ def sparse_data_transformed():
 def test_missing_values(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.helper_functions["MissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert sparse.issparse(mf.value)
     assert mf.value.shape == X.shape
     assert mf.value.dtype == bool
@@ -165,21 +172,24 @@ def test_missing_values(sparse_data):
 def test_number_of_missing_values(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.metafeatures["NumberOfMissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert 0 == mf.value
 
 
 def test_percentage_missing_values(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.metafeatures["PercentageOfMissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert 0 == mf.value
 
 
 def test_number_of_Instances_with_missing_values(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.metafeatures["NumberOfInstancesWithMissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert 0 == mf.value
 
 
@@ -188,16 +198,20 @@ def test_percentage_of_Instances_with_missing_values(sparse_data):
     meta_features.metafeatures.set_value(
         "NumberOfInstancesWithMissingValues",
         meta_features.metafeatures["NumberOfInstancesWithMissingValues"](
-            X, y, logging.getLogger('Meta'), categorical))
+            X, y, logging.getLogger("Meta"), categorical
+        ),
+    )
     mf = meta_features.metafeatures["PercentageOfInstancesWithMissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert pytest.approx(0) == mf.value
 
 
 def test_number_of_features_with_missing_values(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.metafeatures["NumberOfFeaturesWithMissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert 0 == mf.value
 
 
@@ -206,33 +220,72 @@ def test_percentage_of_features_with_missing_values(sparse_data):
     meta_features.metafeatures.set_value(
         "NumberOfFeaturesWithMissingValues",
         meta_features.metafeatures["NumberOfFeaturesWithMissingValues"](
-            X, y, logging.getLogger('Meta'), categorical))
+            X, y, logging.getLogger("Meta"), categorical
+        ),
+    )
     mf = meta_features.metafeatures["PercentageOfFeaturesWithMissingValues"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert pytest.approx(0, mf.value)
 
 
 def test_num_symbols(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.helper_functions["NumSymbols"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
 
-    symbol_frequency = [2, 0, 6, 0, 1, 3, 0, 0, 3, 1, 0, 0, 0, 1, 0, 0,
-                        0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 1, 2, 2]
+    symbol_frequency = [
+        2,
+        0,
+        6,
+        0,
+        1,
+        3,
+        0,
+        0,
+        3,
+        1,
+        0,
+        0,
+        0,
+        1,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        2,
+        0,
+        0,
+        0,
+        0,
+        1,
+        1,
+        2,
+        2,
+    ]
     assert mf.value == symbol_frequency
 
 
 def test_symbols_max(sparse_data):
     X, y, categorical = sparse_data
     # this is attribute steel
-    mf = meta_features.metafeatures["SymbolsMax"](X, y, logging.getLogger('Meta'), categorical)
+    mf = meta_features.metafeatures["SymbolsMax"](
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 6
 
 
 def test_symbols_mean(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.metafeatures["SymbolsMean"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     # Empty looking spaces denote empty attributes
     symbol_frequency = [2, 6, 1, 3, 3, 1, 1, 2, 1, 1, 2, 2]
     assert pytest.approx(mf.value) == np.mean(symbol_frequency)
@@ -241,7 +294,8 @@ def test_symbols_mean(sparse_data):
 def test_symbols_std(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.metafeatures["SymbolsSTD"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     symbol_frequency = [2, 6, 1, 3, 3, 1, 1, 2, 1, 1, 2, 2]
     assert pytest.approx(mf.value) == np.std(symbol_frequency)
 
@@ -249,19 +303,49 @@ def test_symbols_std(sparse_data):
 def test_symbols_sum(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.metafeatures["SymbolsSum"](
-        X, y, logging.getLogger('Meta'), categorical)
+        X, y, logging.getLogger("Meta"), categorical
+    )
     assert mf.value == 25
 
 
 def test_skewnesses(sparse_data_transformed):
     X_transformed, y, categorical_transformed = sparse_data_transformed
     fixture = [
-        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-        -0.696970849903357, 0.626346013011262, 0.38099875966240554,
-        1.4762248835141032, 0.07687661087633788, 0.3688979783036015
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        -0.696970849903357,
+        0.626346013011262,
+        0.38099875966240554,
+        1.4762248835141032,
+        0.07687661087633788,
+        0.3688979783036015,
     ]
-    mf = meta_features.helper_functions["Skewnesses"](X_transformed, y, logging.getLogger('Meta'))
+    mf = meta_features.helper_functions["Skewnesses"](
+        X_transformed, y, logging.getLogger("Meta")
+    )
     print(mf.value)
     print(fixture)
     np.testing.assert_allclose(mf.value, fixture)
@@ -269,13 +353,42 @@ def test_skewnesses(sparse_data_transformed):
 
 def test_kurtosisses(sparse_data_transformed):
     fixture = [
-        -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0,
-        -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -3.0,
-        -3.0, -1.1005836114255763, -1.1786325509475744, -1.23879983823279,
-        1.3934382644137013, -0.9768209837948336, -1.7937072296512784
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -3.0,
+        -1.1005836114255763,
+        -1.1786325509475744,
+        -1.23879983823279,
+        1.3934382644137013,
+        -0.9768209837948336,
+        -1.7937072296512784,
     ]
     X_transformed, y, categorical_transformed = sparse_data_transformed
-    mf = meta_features.helper_functions["Kurtosisses"](X_transformed, y, logging.getLogger('Meta'))
+    mf = meta_features.helper_functions["Kurtosisses"](
+        X_transformed, y, logging.getLogger("Meta")
+    )
     print(mf.value)
     np.testing.assert_allclose(mf.value, fixture)
 
@@ -283,26 +396,30 @@ def test_kurtosisses(sparse_data_transformed):
 def test_pca_95percent(sparse_data_transformed):
     X_transformed, y, categorical_transformed = sparse_data_transformed
     mf = meta_features.metafeatures["PCAFractionOfComponentsFor95PercentVariance"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(0.7741935483870968) == mf.value
 
 
 def test_pca_kurtosis_first_pc(sparse_data_transformed):
     X_transformed, y, categorical_transformed = sparse_data_transformed
     mf = meta_features.metafeatures["PCAKurtosisFirstPC"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(-0.15444516166802469) == mf.value
 
 
 def test_pca_skewness_first_pc(sparse_data_transformed):
     X_transformed, y, categorical_transformed = sparse_data_transformed
     mf = meta_features.metafeatures["PCASkewnessFirstPC"](
-        X_transformed, y, logging.getLogger('Meta'))
+        X_transformed, y, logging.getLogger("Meta")
+    )
     assert pytest.approx(0.026514792083623905) == mf.value
 
 
 def test_calculate_all_metafeatures(sparse_data):
     X, y, categorical = sparse_data
     mf = meta_features.calculate_all_metafeatures(
-        X, y, categorical, "2", logger=logging.getLogger('Meta'))
+        X, y, categorical, "2", logger=logging.getLogger("Meta")
+    )
     assert 52 == len(mf.metafeature_values)
diff --git a/test/test_metalearning/pyMetaLearn/test_metalearner.py b/test/test_metalearning/pyMetaLearn/test_metalearner.py
index 58f2ce800a..a8b7d604cb 100644
--- a/test/test_metalearning/pyMetaLearn/test_metalearner.py
+++ b/test/test_metalearning/pyMetaLearn/test_metalearner.py
@@ -1,14 +1,13 @@
 import logging
-import numpy as np
 import os
 import unittest
 
+import numpy as np
 import pandas as pd
-
 from ConfigSpace.configuration_space import Configuration
-import autosklearn.pipeline.classification
 
-import autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner as metalearner
+import autosklearn.metalearning.optimizers.metalearn_optimizer.metalearner as metalearner  # noqa: E501
+import autosklearn.pipeline.classification
 from autosklearn.metalearning.metalearning.meta_base import MetaBase
 
 logging.basicConfig()
@@ -20,7 +19,7 @@ class MetaLearnerTest(unittest.TestCase):
     def setUp(self):
         self.cwd = os.getcwd()
         data_dir = os.path.dirname(__file__)
-        data_dir = os.path.join(data_dir, 'test_meta_base_data')
+        data_dir = os.path.join(data_dir, "test_meta_base_data")
         os.chdir(data_dir)
 
         pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
@@ -29,7 +28,8 @@ def setUp(self):
         self.logger = logging.getLogger()
         meta_base = MetaBase(self.cs, data_dir, logger=self.logger)
         self.meta_optimizer = metalearner.MetaLearningOptimizer(
-            '233', self.cs, meta_base, logger=self.logger)
+            "233", self.cs, meta_base, logger=self.logger
+        )
 
     def tearDown(self):
         os.chdir(self.cwd)
@@ -38,8 +38,8 @@ def test_metalearning_suggest_all(self):
         ret = self.meta_optimizer.metalearning_suggest_all()
         self.assertEqual(124, len(ret))
         # Reduced to 17 as we changed QDA searchspace
-        self.assertEqual('gradient_boosting', ret[0]['classifier:__choice__'])
-        self.assertEqual('adaboost', ret[1]['classifier:__choice__'])
+        self.assertEqual("gradient_boosting", ret[0]["classifier:__choice__"])
+        self.assertEqual("adaboost", ret[1]["classifier:__choice__"])
         # There is no test for exclude_double_configuration as it's not present
         # in the test data
 
@@ -48,17 +48,17 @@ def test_metalearning_suggest_all_nan_metafeatures(self):
         ret = self.meta_optimizer.metalearning_suggest_all()
         self.assertEqual(124, len(ret))
         # Reduced to 17 as we changed QDA searchspace
-        self.assertEqual('gradient_boosting', ret[0]['classifier:__choice__'])
-        self.assertEqual('gradient_boosting', ret[1]['classifier:__choice__'])
+        self.assertEqual("gradient_boosting", ret[0]["classifier:__choice__"])
+        self.assertEqual("gradient_boosting", ret[1]["classifier:__choice__"])
 
     def test_metalearning_suggest(self):
         ret = self.meta_optimizer.metalearning_suggest([])
         self.assertIsInstance(ret, Configuration)
-        self.assertEqual('gradient_boosting', ret['classifier:__choice__'])
+        self.assertEqual("gradient_boosting", ret["classifier:__choice__"])
 
         ret2 = self.meta_optimizer.metalearning_suggest([ret])
         self.assertIsInstance(ret2, Configuration)
-        self.assertEqual('adaboost', ret2['classifier:__choice__'])
+        self.assertEqual("adaboost", ret2["classifier:__choice__"])
 
     def test_learn(self):
         # Test only some special cases which are probably not yet handled
@@ -67,8 +67,10 @@ def test_learn(self):
         self.meta_optimizer._learn()
 
     def test_split_metafeature_array(self):
-        ds_metafeatures, other_metafeatures = self.meta_optimizer. \
-            _split_metafeature_array()
+        (
+            ds_metafeatures,
+            other_metafeatures,
+        ) = self.meta_optimizer._split_metafeature_array()
         self.assertIsInstance(ds_metafeatures, pd.Series)
         self.assertEqual(ds_metafeatures.shape, (46,))
         self.assertIsInstance(other_metafeatures, pd.DataFrame)
diff --git a/test/test_metalearning/pyMetaLearn/test_optimizer_base.py b/test/test_metalearning/pyMetaLearn/test_optimizer_base.py
index a78a6a7f61..63dc2184da 100644
--- a/test/test_metalearning/pyMetaLearn/test_optimizer_base.py
+++ b/test/test_metalearning/pyMetaLearn/test_optimizer_base.py
@@ -1,5 +1,5 @@
-from collections import OrderedDict
 import unittest
+from collections import OrderedDict
 
 from autosklearn.metalearning.optimizers import optimizer_base
 
@@ -14,8 +14,9 @@ def setUp(self):
 
     def test_parse_hyperopt_string(self):
         hyperparameter_string = "x {-5, 0, 5, 10}\ny {0, 5, 10, 15}"
-        expected = OrderedDict([["x", ["-5", "0", "5", "10"]],
-                                ["y", ["0", "5", "10", "15"]]])
+        expected = OrderedDict(
+            [["x", ["-5", "0", "5", "10"]], ["y", ["0", "5", "10", "15"]]]
+        )
         ret = optimizer_base.parse_hyperparameter_string(hyperparameter_string)
         self.assertEqual(ret, expected)
 
@@ -28,8 +29,11 @@ def test_parse_hyperopt_string(self):
         self.assertEqual(ret, expected)
 
         hyperparameter_string = "x {-5, 0, 5, 10}\ny 0, 5, 10, 15} [5]"
-        self.assertRaises(ValueError, optimizer_base.parse_hyperparameter_string,
-                          hyperparameter_string)
+        self.assertRaises(
+            ValueError,
+            optimizer_base.parse_hyperparameter_string,
+            hyperparameter_string,
+        )
 
     def test_construct_cli_call(self):
         cli_call = optimizer_base.construct_cli_call("cv.py", {"x": -5, "y": 0})
diff --git a/test/test_metalearning/test_metalearning.py b/test/test_metalearning/test_metalearning.py
index 6a7e87511d..3ec847a8f5 100644
--- a/test/test_metalearning/test_metalearning.py
+++ b/test/test_metalearning/test_metalearning.py
@@ -1,18 +1,17 @@
 # -*- encoding: utf-8 -*-
 import unittest
 
-from autosklearn.pipeline.util import get_dataset
-from autosklearn.classification import AutoSklearnClassifier
+from sklearn.datasets import load_breast_cancer
 
-from autosklearn.smbo import _calculate_metafeatures, _calculate_metafeatures_encoded
-from autosklearn.constants import REGRESSION, MULTICLASS_CLASSIFICATION
+from autosklearn.classification import AutoSklearnClassifier
+from autosklearn.constants import MULTICLASS_CLASSIFICATION, REGRESSION
 from autosklearn.metalearning.mismbo import suggest_via_metalearning
+from autosklearn.pipeline.util import get_dataset
+from autosklearn.smbo import _calculate_metafeatures, _calculate_metafeatures_encoded
 from autosklearn.util.pipeline import get_configuration_space
-from sklearn.datasets import load_breast_cancer
 
 
 class MetafeatureValueDummy(object):
-
     def __init__(self, name, value):
         self.name = name
         self.value = value
@@ -22,83 +21,93 @@ class Test(unittest.TestCase):
     _multiprocess_can_split_ = True
 
     def setUp(self):
-        self.X_train, self.Y_train, self.X_test, self.Y_test = \
-            get_dataset('iris')
+        self.X_train, self.Y_train, self.X_test, self.Y_test = get_dataset("iris")
 
         eliminate_class_two = self.Y_train != 2
         self.X_train = self.X_train[eliminate_class_two]
         self.Y_train = self.Y_train[eliminate_class_two]
 
-    @unittest.skip('TODO refactor!')
+    @unittest.skip("TODO refactor!")
     def test_metalearning(self):
-        dataset_name_classification = 'digits'
+        dataset_name_classification = "digits"
         initial_challengers_classification = {
-            "ACC_METRIC": "--initial-challengers \" "
-                          "-balancing:strategy 'weighting' "
-                          "-classifier:__choice__ 'proj_logit'",
-            "AUC_METRIC": "--initial-challengers \" "
-                          "-balancing:strategy 'weighting' "
-                          "-classifier:__choice__ 'liblinear_svc'",
-            "BAC_METRIC": "--initial-challengers \" "
-                          "-balancing:strategy 'weighting' "
-                          "-classifier:__choice__ 'proj_logit'",
-            "F1_METRIC": "--initial-challengers \" "
-                         "-balancing:strategy 'weighting' "
-                         "-classifier:__choice__ 'proj_logit'",
-            "PAC_METRIC": "--initial-challengers \" "
-                          "-balancing:strategy 'none' "
-                          "-classifier:__choice__ 'random_forest'"
+            "ACC_METRIC": '--initial-challengers " '
+            "-balancing:strategy 'weighting' "
+            "-classifier:__choice__ 'proj_logit'",
+            "AUC_METRIC": '--initial-challengers " '
+            "-balancing:strategy 'weighting' "
+            "-classifier:__choice__ 'liblinear_svc'",
+            "BAC_METRIC": '--initial-challengers " '
+            "-balancing:strategy 'weighting' "
+            "-classifier:__choice__ 'proj_logit'",
+            "F1_METRIC": '--initial-challengers " '
+            "-balancing:strategy 'weighting' "
+            "-classifier:__choice__ 'proj_logit'",
+            "PAC_METRIC": '--initial-challengers " '
+            "-balancing:strategy 'none' "
+            "-classifier:__choice__ 'random_forest'",
         }
 
-        dataset_name_regression = 'diabetes'
+        dataset_name_regression = "diabetes"
         initial_challengers_regression = {
-            "A_METRIC": "--initial-challengers \" "
-                        "-imputation:strategy 'mean' "
-                        "-one_hot_encoding:minimum_fraction '0.01' "
-                        "-one_hot_encoding:use_minimum_fraction 'True' "
-                        "-preprocessor:__choice__ 'no_preprocessing' "
-                        "-regressor:__choice__ 'random_forest'",
-            "R2_METRIC": "--initial-challengers \" "
-                         "-imputation:strategy 'mean' "
-                         "-one_hot_encoding:minimum_fraction '0.01' "
-                         "-one_hot_encoding:use_minimum_fraction 'True' "
-                         "-preprocessor:__choice__ 'no_preprocessing' "
-                         "-regressor:__choice__ 'random_forest'",
+            "A_METRIC": '--initial-challengers " '
+            "-imputation:strategy 'mean' "
+            "-one_hot_encoding:minimum_fraction '0.01' "
+            "-one_hot_encoding:use_minimum_fraction 'True' "
+            "-preprocessor:__choice__ 'no_preprocessing' "
+            "-regressor:__choice__ 'random_forest'",
+            "R2_METRIC": '--initial-challengers " '
+            "-imputation:strategy 'mean' "
+            "-one_hot_encoding:minimum_fraction '0.01' "
+            "-one_hot_encoding:use_minimum_fraction 'True' "
+            "-preprocessor:__choice__ 'no_preprocessing' "
+            "-regressor:__choice__ 'random_forest'",
         }
 
         for dataset_name, task, initial_challengers in [
             (dataset_name_regression, REGRESSION, initial_challengers_regression),
-            (dataset_name_classification, MULTICLASS_CLASSIFICATION,
-             initial_challengers_classification)]:
+            (
+                dataset_name_classification,
+                MULTICLASS_CLASSIFICATION,
+                initial_challengers_classification,
+            ),
+        ]:
 
             for metric in initial_challengers:
                 configuration_space = get_configuration_space(
-                    {
-                        'metric': metric,
-                        'task': task,
-                        'is_sparse': False
-                    },
-                    include={'feature_preprocessor': ['no_preprocessing']})
+                    {"metric": metric, "task": task, "is_sparse": False},
+                    include={"feature_preprocessor": ["no_preprocessing"]},
+                )
 
                 X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
                 categorical = {i: False for i in range(X_train.shape[1])}
 
                 meta_features_label = _calculate_metafeatures(
-                    X_train, Y_train, categorical, dataset_name, task)
+                    X_train, Y_train, categorical, dataset_name, task
+                )
                 meta_features_encoded_label = _calculate_metafeatures_encoded(
-                    X_train, Y_train, categorical, dataset_name, task)
-
-                initial_configuration_strings_for_smac = \
-                    suggest_via_metalearning(
-                        meta_features_label,
-                        meta_features_encoded_label,
-                        configuration_space, dataset_name, metric,
-                        task, False, 1, None)
+                    X_train, Y_train, categorical, dataset_name, task
+                )
+
+                initial_configuration_strings_for_smac = suggest_via_metalearning(
+                    meta_features_label,
+                    meta_features_encoded_label,
+                    configuration_space,
+                    dataset_name,
+                    metric,
+                    task,
+                    False,
+                    1,
+                    None,
+                )
 
                 print(metric)
                 print(initial_configuration_strings_for_smac[0])
-                self.assertTrue(initial_configuration_strings_for_smac[
-                                    0].startswith(initial_challengers[metric]))
+                self.assertTrue(
+                    initial_configuration_strings_for_smac[0].startswith(
+                        initial_challengers[metric]
+                    )
+                )
 
     def test_metadata_directory(self):
         # Test that metadata directory is set correctly (if user specifies,
@@ -108,11 +117,10 @@ def test_metadata_directory(self):
         automl1 = AutoSklearnClassifier(
             time_left_for_this_task=30,
             per_run_time_limit=5,
-            metadata_directory="pyMetaLearn/metadata_dir",  # user specified metadata_dir
+            metadata_directory="pyMetaLearn/metadata_dir",  # user metadata_dir
             dask_client=dask_client,
         )
-        self.assertEqual(automl1.metadata_directory,
-                         "pyMetaLearn/metadata_dir")
+        self.assertEqual(automl1.metadata_directory, "pyMetaLearn/metadata_dir")
 
         automl2 = AutoSklearnClassifier(  # default metadata_dir
             time_left_for_this_task=30,
@@ -130,6 +138,11 @@ def test_metadata_directory(self):
             ensemble_size=0,
         )
         X, y = load_breast_cancer(return_X_y=True)
-        self.assertRaisesRegex(ValueError, "The specified metadata directory "
-                               "\'%s\' does not exist!" % nonexistent_dir,
-                               automl3.fit, X=X, y=y)
+        self.assertRaisesRegex(
+            ValueError,
+            "The specified metadata directory "
+            "'%s' does not exist!" % nonexistent_dir,
+            automl3.fit,
+            X=X,
+            y=y,
+        )
diff --git a/test/test_metric/__init__.py b/test/test_metric/__init__.py
index cc3cd7becd..e298f0f075 100644
--- a/test/test_metric/__init__.py
+++ b/test/test_metric/__init__.py
@@ -1,2 +1,2 @@
 # -*- encoding: utf-8 -*-
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_metric/test_metrics.py b/test/test_metric/test_metrics.py
index 3c6ff73c2b..334a485fe3 100644
--- a/test/test_metric/test_metrics.py
+++ b/test/test_metric/test_metrics.py
@@ -1,27 +1,24 @@
 import unittest
 import warnings
 
-import pytest
-
 import numpy as np
+import pytest
 import sklearn.metrics
+from smac.utils.constants import MAXINT
 
 import autosklearn.metrics
-
-from autosklearn.metrics import calculate_score, calculate_loss, calculate_metric
 from autosklearn.constants import BINARY_CLASSIFICATION, REGRESSION
-
-from smac.utils.constants import MAXINT
+from autosklearn.metrics import calculate_loss, calculate_metric, calculate_score
 
 
 class TestScorer(unittest.TestCase):
-
     def test_predict_scorer_binary(self):
         y_true = np.array([0, 0, 1, 1])
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, 0, 1, {})
+            "accuracy", sklearn.metrics.accuracy_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -35,15 +32,20 @@ def test_predict_scorer_binary(self):
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._PredictScorer(
-            'bac', sklearn.metrics.balanced_accuracy_score,
-            1, 0, 1, {})
+            "bac", sklearn.metrics.balanced_accuracy_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._PredictScorer(
-            name='accuracy', score_func=sklearn.metrics.accuracy_score,
-            optimum=1, worst_possible_result=0, sign=-1, kwargs={})
+            name="accuracy",
+            score_func=sklearn.metrics.accuracy_score,
+            optimum=1,
+            worst_possible_result=0,
+            sign=-1,
+            kwargs={},
+        )
 
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -54,7 +56,8 @@ def test_predict_scorer_multiclass(self):
         y_pred = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, 0, 1, {})
+            "accuracy", sklearn.metrics.accuracy_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -68,14 +71,15 @@ def test_predict_scorer_multiclass(self):
         self.assertAlmostEqual(score, 0.333333333)
 
         scorer = autosklearn.metrics._PredictScorer(
-            'bac', sklearn.metrics.balanced_accuracy_score,
-            1, 0, 1, {})
+            "bac", sklearn.metrics.balanced_accuracy_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.333333333)
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, 0, -1, {})
+            "accuracy", sklearn.metrics.accuracy_score, 1, 0, -1, {}
+        )
 
         y_pred = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -86,7 +90,8 @@ def test_predict_scorer_multilabel(self):
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, 0, 1, {})
+            "accuracy", sklearn.metrics.accuracy_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -100,7 +105,8 @@ def test_predict_scorer_multilabel(self):
         self.assertAlmostEqual(score, 0.25)
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, 0, -1, {})
+            "accuracy", sklearn.metrics.accuracy_score, 1, 0, -1, {}
+        )
 
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -111,7 +117,8 @@ def test_predict_scorer_regression(self):
         y_pred = y_true.copy()
 
         scorer = autosklearn.metrics._PredictScorer(
-            'r2', sklearn.metrics.r2_score, 1, 0, 1, {})
+            "r2", sklearn.metrics.r2_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -125,7 +132,8 @@ def test_proba_scorer_binary(self):
         y_pred = [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'log_loss', sklearn.metrics.log_loss, 0, MAXINT, 1, {})
+            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.0)
@@ -139,7 +147,8 @@ def test_proba_scorer_binary(self):
         self.assertAlmostEqual(score, 0.69314718055994529)
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'log_loss', sklearn.metrics.log_loss, 0, MAXINT, -1, {})
+            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, -1, {}
+        )
 
         y_pred = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
         score = scorer(y_true, y_pred)
@@ -150,7 +159,8 @@ def test_proba_scorer_multiclass(self):
         y_pred = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'log_loss', sklearn.metrics.log_loss, 0, MAXINT, 1, {})
+            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.0)
@@ -164,7 +174,8 @@ def test_proba_scorer_multiclass(self):
         self.assertAlmostEqual(score, 1.0986122886681096)
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'log_loss', sklearn.metrics.log_loss, 0, MAXINT, -1, {})
+            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, -1, {}
+        )
 
         y_pred = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
         score = scorer(y_true, y_pred)
@@ -175,7 +186,8 @@ def test_proba_scorer_multilabel(self):
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'log_loss', sklearn.metrics.log_loss, 0, MAXINT, 1, {})
+            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.34657359027997314)
@@ -189,7 +201,8 @@ def test_proba_scorer_multilabel(self):
         self.assertAlmostEqual(score, 0.69314718055994529)
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'log_loss', sklearn.metrics.log_loss, 0, MAXINT, -1, {})
+            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, -1, {}
+        )
 
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -200,7 +213,8 @@ def test_threshold_scorer_binary(self):
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'roc_auc', sklearn.metrics.roc_auc_score, 1, 0, 1, {})
+            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -214,7 +228,8 @@ def test_threshold_scorer_binary(self):
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'roc_auc', sklearn.metrics.roc_auc_score, 1, 0, -1, {})
+            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, -1, {}
+        )
 
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -225,7 +240,8 @@ def test_threshold_scorer_multilabel(self):
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'roc_auc', sklearn.metrics.roc_auc_score, 1, 0, 1, {})
+            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, 1, {}
+        )
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -239,7 +255,8 @@ def test_threshold_scorer_multilabel(self):
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'roc_auc', sklearn.metrics.roc_auc_score, 1, 0, -1, {})
+            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, -1, {}
+        )
 
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -250,7 +267,8 @@ def test_sign_flip(self):
         y_pred = y_true.copy()
 
         scorer = autosklearn.metrics.make_scorer(
-            'r2', sklearn.metrics.r2_score, greater_is_better=True)
+            "r2", sklearn.metrics.r2_score, greater_is_better=True
+        )
 
         score = scorer(y_true, y_pred + 1.0)
         self.assertAlmostEqual(score, -9.0)
@@ -262,7 +280,8 @@ def test_sign_flip(self):
         self.assertAlmostEqual(score, 1.0)
 
         scorer = autosklearn.metrics.make_scorer(
-            'r2', sklearn.metrics.r2_score, greater_is_better=False)
+            "r2", sklearn.metrics.r2_score, greater_is_better=False
+        )
 
         score = scorer(y_true, y_pred + 1.0)
         self.assertAlmostEqual(score, 9.0)
@@ -275,49 +294,44 @@ def test_sign_flip(self):
 
 
 class TestMetricsDoNotAlterInput(unittest.TestCase):
-
     def test_regression_metrics(self):
         for metric, scorer in autosklearn.metrics.REGRESSION_METRICS.items():
             y_true = np.random.random(100).reshape((-1, 1))
             y_pred = y_true.copy() + np.random.randn(100, 1) * 0.1
 
-            if metric == 'mean_squared_log_error':
+            if metric == "mean_squared_log_error":
                 y_true = np.abs(y_true)
                 y_pred = np.abs(y_pred)
 
             y_true_2 = y_true.copy()
             y_pred_2 = y_pred.copy()
             self.assertTrue(np.isfinite(scorer(y_true_2, y_pred_2)))
-            np.testing.assert_array_almost_equal(y_true, y_true_2,
-                                                 err_msg=metric)
-            np.testing.assert_array_almost_equal(y_pred, y_pred_2,
-                                                 err_msg=metric)
+            np.testing.assert_array_almost_equal(y_true, y_true_2, err_msg=metric)
+            np.testing.assert_array_almost_equal(y_pred, y_pred_2, err_msg=metric)
 
     def test_classification_metrics(self):
         for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
             y_true = np.random.randint(0, 2, size=(100, 1))
             y_pred = np.random.random(200).reshape((-1, 2))
-            y_pred = np.array([y_pred[i] / np.sum(y_pred[i])
-                               for i in range(100)])
+            y_pred = np.array([y_pred[i] / np.sum(y_pred[i]) for i in range(100)])
 
             y_true_2 = y_true.copy()
             y_pred_2 = y_pred.copy()
             try:
                 self.assertTrue(np.isfinite(scorer(y_true_2, y_pred_2)))
-                np.testing.assert_array_almost_equal(y_true, y_true_2,
-                                                     err_msg=metric)
-                np.testing.assert_array_almost_equal(y_pred, y_pred_2,
-                                                     err_msg=metric)
+                np.testing.assert_array_almost_equal(y_true, y_true_2, err_msg=metric)
+                np.testing.assert_array_almost_equal(y_pred, y_pred_2, err_msg=metric)
             except ValueError as e:
-                if e.args[0] == 'Samplewise metrics are not available outside' \
-                        ' of multilabel classification.':
+                if (
+                    e.args[0] == "Samplewise metrics are not available outside"
+                    " of multilabel classification."
+                ):
                     pass
                 else:
                     raise e
 
 
 class TestMetric(unittest.TestCase):
-
     def test_regression_all(self):
 
         for metric, scorer in autosklearn.metrics.REGRESSION_METRICS.items():
@@ -331,7 +345,7 @@ def test_regression_all(self):
             current_score = scorer(y_true, y_pred)
             self.assertLess(current_score, previous_score)
 
-            if scorer.name == 'mean_squared_log_error':
+            if scorer.name == "mean_squared_log_error":
                 continue
 
             y_pred = np.array([-1, 0, -1, 0])
@@ -352,31 +366,39 @@ def test_classification_binary(self):
             # TODO: but its behavior is not right. When y_pred is completely
             # TODO: wrong, it does return 0.5, but when it is not completely
             # TODO: wrong, it returns value smaller than 0.5.
-            if metric in ['average_precision',
-                          'precision_samples', 'recall_samples', 'f1_samples']:
+            if metric in [
+                "average_precision",
+                "precision_samples",
+                "recall_samples",
+                "f1_samples",
+            ]:
                 continue
 
             y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
-            y_pred = \
-                np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
+            y_pred = np.array(
+                [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]
+            )
             previous_score = scorer._optimum
             current_score = scorer(y_true, y_pred)
             self.assertAlmostEqual(current_score, previous_score)
 
-            y_pred = \
-                np.array([[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]])
+            y_pred = np.array(
+                [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]]
+            )
             previous_score = current_score
             current_score = scorer(y_true, y_pred)
             self.assertLess(current_score, previous_score)
 
-            y_pred = \
-                np.array([[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]])
+            y_pred = np.array(
+                [[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]
+            )
             previous_score = current_score
             current_score = scorer(y_true, y_pred)
             self.assertLess(current_score, previous_score)
 
-            y_pred = \
-                np.array([[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]])
+            y_pred = np.array(
+                [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]
+            )
             previous_score = current_score
             current_score = scorer(y_true, y_pred)
             self.assertLess(current_score, previous_score)
@@ -390,76 +412,86 @@ def test_classification_multiclass(self):
         #
         #   This test should be parameterized so we can identify which metrics
         #   cause which warning specifically and rectify if needed.
-        ignored_warnings = [
-            (UserWarning, 'y_pred contains classes not in y_true')
-        ]
+        ignored_warnings = [(UserWarning, "y_pred contains classes not in y_true")]
 
         for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
             # Skip functions not applicable for multiclass classification.
-            if metric in ['roc_auc', 'average_precision',
-                          'precision', 'recall', 'f1', 'precision_samples',
-                          'recall_samples', 'f1_samples']:
+            if metric in [
+                "roc_auc",
+                "average_precision",
+                "precision",
+                "recall",
+                "f1",
+                "precision_samples",
+                "recall_samples",
+                "f1_samples",
+            ]:
                 continue
 
-            y_true = np.array(
-                [0.0, 0.0, 1.0, 1.0, 2.0]
-            )
+            y_true = np.array([0.0, 0.0, 1.0, 1.0, 2.0])
 
-            y_pred = np.array([
-                [1.0, 0.0, 0.0],
-                [1.0, 0.0, 0.0],
-                [0.0, 1.0, 0.0],
-                [0.0, 1.0, 0.0],
-                [0.0, 0.0, 1.0]
-            ])
+            y_pred = np.array(
+                [
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 0.0, 1.0],
+                ]
+            )
             previous_score = scorer._optimum
             current_score = scorer(y_true, y_pred)
             self.assertAlmostEqual(current_score, previous_score)
 
-            y_pred = np.array([
-                [1.0, 0.0, 0.0],
-                [1.0, 0.0, 0.0],
-                [1.0, 0.0, 0.0],
-                [0.0, 1.0, 0.0],
-                [0.0, 0.0, 1.0],
-            ])
+            y_pred = np.array(
+                [
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 0.0, 1.0],
+                ]
+            )
             previous_score = current_score
             current_score = scorer(y_true, y_pred)
             self.assertLess(current_score, previous_score)
 
-            y_pred = np.array([
-                [0.0, 0.0, 1.0],
-                [0.0, 1.0, 0.0],
-                [1.0, 0.0, 0.0],
-                [0.0, 1.0, 0.0],
-                [0.0, 1.0, 0.0]
-            ])
+            y_pred = np.array(
+                [
+                    [0.0, 0.0, 1.0],
+                    [0.0, 1.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ]
+            )
             previous_score = current_score
             current_score = scorer(y_true, y_pred)
             self.assertLess(current_score, previous_score)
 
-            y_pred = np.array([
-                [0.0, 0.0, 1.0],
-                [0.0, 0.0, 1.0],
-                [1.0, 0.0, 0.0],
-                [1.0, 0.0, 0.0],
-                [0.0, 1.0, 0.0]
-            ])
+            y_pred = np.array(
+                [
+                    [0.0, 0.0, 1.0],
+                    [0.0, 0.0, 1.0],
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ]
+            )
             previous_score = current_score
             current_score = scorer(y_true, y_pred)
             self.assertLess(current_score, previous_score)
 
             # less labels in the targets than in the predictions
             y_true = np.array([0.0, 0.0, 1.0, 1.0])
-            y_pred = np.array([
-                [1.0, 0.0, 0.0], [1.0, 0.0, 0.0],
-                [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
+            y_pred = np.array(
+                [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
             )
 
             with warnings.catch_warnings():
                 for category, message in ignored_warnings:
                     warnings.filterwarnings(
-                        'ignore', category=category, message=message
+                        "ignore", category=category, message=message
                     )
 
                 score = scorer(y_true, y_pred)
@@ -469,8 +501,14 @@ def test_classification_multilabel(self):
 
         for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
             # Skip functions not applicable for multi-label classification.
-            if metric in ['roc_auc', 'log_loss',
-                          'precision', 'recall', 'f1', 'balanced_accuracy']:
+            if metric in [
+                "roc_auc",
+                "log_loss",
+                "precision",
+                "recall",
+                "f1",
+                "balanced_accuracy",
+            ]:
                 continue
             y_true = np.array([[1, 0, 0], [1, 1, 0], [0, 1, 1], [1, 1, 1]])
             y_pred = y_true.copy()
@@ -495,11 +533,11 @@ def test_classification_multilabel(self):
 
 
 class TestCalculateScore(unittest.TestCase):
-
     def test_unsupported_task_type(self):
         y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
-        y_pred = \
-            np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
+        y_pred = np.array(
+            [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]
+        )
         scorer = autosklearn.metrics.accuracy
 
         raised = False
@@ -513,17 +551,22 @@ def test_classification_scoring_functions(self):
 
         scoring_functions = list(autosklearn.metrics.CLASSIFICATION_METRICS.values())
         scoring_functions.remove(autosklearn.metrics.accuracy)
-        fail_metrics = ['precision_samples', 'recall_samples', 'f1_samples']
+        fail_metrics = ["precision_samples", "recall_samples", "f1_samples"]
         success_metrics = list(autosklearn.metrics.CLASSIFICATION_METRICS.keys())
         for metric in fail_metrics:
             success_metrics.remove(metric)
 
         y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
-        y_pred = \
-            np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
-        score_dict = calculate_score(y_true, y_pred, BINARY_CLASSIFICATION,
-                                     autosklearn.metrics.accuracy,
-                                     scoring_functions)
+        y_pred = np.array(
+            [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]
+        )
+        score_dict = calculate_score(
+            y_true,
+            y_pred,
+            BINARY_CLASSIFICATION,
+            autosklearn.metrics.accuracy,
+            scoring_functions,
+        )
 
         self.assertIsInstance(score_dict, dict)
         self.assertTrue(len(success_metrics), len(score_dict))
@@ -531,8 +574,10 @@ def test_classification_scoring_functions(self):
             self.assertNotIn(metric, score_dict.keys())
         for metric in success_metrics:
             self.assertIn(metric, score_dict.keys())
-            self.assertAlmostEqual(autosklearn.metrics.CLASSIFICATION_METRICS[metric]._optimum,
-                                   score_dict[metric])
+            self.assertAlmostEqual(
+                autosklearn.metrics.CLASSIFICATION_METRICS[metric]._optimum,
+                score_dict[metric],
+            )
 
     def test_regression_scoring_functions(self):
 
@@ -540,26 +585,33 @@ def test_regression_scoring_functions(self):
         scoring_functions.remove(autosklearn.metrics.root_mean_squared_error)
 
         metrics = list(autosklearn.metrics.REGRESSION_METRICS.keys())
-        metrics.remove('mean_squared_log_error')
+        metrics.remove("mean_squared_log_error")
 
         y_true = np.array([1, 2, 3, -4])
         y_pred = y_true.copy()
 
-        score_dict = calculate_score(y_true, y_pred, REGRESSION,
-                                     autosklearn.metrics.root_mean_squared_error,
-                                     scoring_functions)
+        score_dict = calculate_score(
+            y_true,
+            y_pred,
+            REGRESSION,
+            autosklearn.metrics.root_mean_squared_error,
+            scoring_functions,
+        )
 
         self.assertIsInstance(score_dict, dict)
         self.assertTrue(len(metrics), len(score_dict))
         for metric in metrics:
             self.assertIn(metric, score_dict.keys())
-            self.assertAlmostEqual(autosklearn.metrics.REGRESSION_METRICS[metric]._optimum,
-                                   score_dict[metric])
+            self.assertAlmostEqual(
+                autosklearn.metrics.REGRESSION_METRICS[metric]._optimum,
+                score_dict[metric],
+            )
 
     def test_classification_only_metric(self):
         y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
-        y_pred = \
-            np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
+        y_pred = np.array(
+            [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]
+        )
         scorer = autosklearn.metrics.accuracy
 
         score = calculate_score(y_true, y_pred, BINARY_CLASSIFICATION, scorer)
@@ -602,22 +654,28 @@ def test_calculate_loss():
         prediction=y_pred,
         task_type=BINARY_CLASSIFICATION,
         metric=autosklearn.metrics.accuracy,
-        scoring_functions=[autosklearn.metrics.accuracy, autosklearn.metrics.balanced_accuracy]
+        scoring_functions=[
+            autosklearn.metrics.accuracy,
+            autosklearn.metrics.balanced_accuracy,
+        ],
     )
     expected_score_dict = {
-        'accuracy': 0.9,
-        'balanced_accuracy': 0.9285714285714286,
+        "accuracy": 0.9,
+        "balanced_accuracy": 0.9285714285714286,
     }
     loss_dict = calculate_loss(
         solution=y_true,
         prediction=y_pred,
         task_type=BINARY_CLASSIFICATION,
         metric=autosklearn.metrics.accuracy,
-        scoring_functions=[autosklearn.metrics.accuracy, autosklearn.metrics.balanced_accuracy]
+        scoring_functions=[
+            autosklearn.metrics.accuracy,
+            autosklearn.metrics.balanced_accuracy,
+        ],
     )
     for expected_metric, expected_score in expected_score_dict.items():
         assert pytest.approx(expected_score) == score_dict[expected_metric]
-        assert pytest.approx(1-expected_score) == loss_dict[expected_metric]
+        assert pytest.approx(1 - expected_score) == loss_dict[expected_metric]
 
     # Lastly make sure that metrics whose optimum is zero
     # are also properly working
diff --git a/test/test_optimizer/test_smbo.py b/test/test_optimizer/test_smbo.py
index 4b7f0ffd79..fafd7b5a42 100644
--- a/test/test_optimizer/test_smbo.py
+++ b/test/test_optimizer/test_smbo.py
@@ -1,36 +1,39 @@
 import logging.handlers
 
-from ConfigSpace.configuration_space import Configuration
-
 import pytest
+from ConfigSpace.configuration_space import Configuration
 
 import autosklearn.metrics
-from autosklearn.smbo import AutoMLSMBO
 import autosklearn.pipeline.util as putil
 from autosklearn.automl import AutoML
 from autosklearn.constants import BINARY_CLASSIFICATION
 from autosklearn.data.xy_data_manager import XYDataManager
+from autosklearn.smbo import AutoMLSMBO
 from autosklearn.util.stopwatch import StopWatch
 
 
-@pytest.mark.parametrize("context", ['fork', 'forkserver'])
+@pytest.mark.parametrize("context", ["fork", "forkserver"])
 def test_smbo_metalearning_configurations(backend, context, dask_client):
 
     # Get the inputs to the optimizer
-    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
-    config_space = AutoML(delete_tmp_folder_after_terminate=False,
-                          metric=autosklearn.metrics.accuracy,
-                          time_left_for_this_task=20,
-                          per_run_time_limit=5).fit(
-                              X_train, Y_train,
-                              task=BINARY_CLASSIFICATION,
-                              only_return_configuration_space=True)
+    X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
+    config_space = AutoML(
+        delete_tmp_folder_after_terminate=False,
+        metric=autosklearn.metrics.accuracy,
+        time_left_for_this_task=20,
+        per_run_time_limit=5,
+    ).fit(
+        X_train,
+        Y_train,
+        task=BINARY_CLASSIFICATION,
+        only_return_configuration_space=True,
+    )
     watcher = StopWatch()
 
     # Create an optimizer
     smbo = AutoMLSMBO(
         config_space=config_space,
-        dataset_name='iris',
+        dataset_name="iris",
         backend=backend,
         total_walltime_limit=10,
         func_eval_time_limit=5,
@@ -49,11 +52,13 @@ def test_smbo_metalearning_configurations(backend, context, dask_client):
 
     # Create the inputs to metalearning
     datamanager = XYDataManager(
-        X_train, Y_train,
-        X_test, Y_test,
+        X_train,
+        Y_train,
+        X_test,
+        Y_test,
         task=BINARY_CLASSIFICATION,
-        dataset_name='iris',
-        feat_type={i: 'numerical' for i in range(X_train.shape[1])},
+        dataset_name="iris",
+        feat_type={i: "numerical" for i in range(X_train.shape[1])},
     )
     backend.save_datamanager(datamanager)
     smbo.task = BINARY_CLASSIFICATION
diff --git a/test/test_pipeline/components/__init__.py b/test/test_pipeline/components/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/test/test_pipeline/components/__init__.py
+++ b/test/test_pipeline/components/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_pipeline/components/classification/__init__.py b/test/test_pipeline/components/classification/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/test/test_pipeline/components/classification/__init__.py
+++ b/test/test_pipeline/components/classification/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_pipeline/components/classification/test_adaboost.py b/test/test_pipeline/components/classification/test_adaboost.py
index 3c0d96f9a6..f41ba3319f 100644
--- a/test/test_pipeline/components/classification/test_adaboost.py
+++ b/test/test_pipeline/components/classification/test_adaboost.py
@@ -1,7 +1,7 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.classification.adaboost import \
-    AdaboostClassifier
+from autosklearn.pipeline.components.classification.adaboost import AdaboostClassifier
+
 from .test_base import BaseClassificationComponentTest
 
 
diff --git a/test/test_pipeline/components/classification/test_base.py b/test/test_pipeline/components/classification/test_base.py
index 4fc381af56..a524759bc5 100644
--- a/test/test_pipeline/components/classification/test_base.py
+++ b/test/test_pipeline/components/classification/test_base.py
@@ -1,15 +1,18 @@
-from typing import Optional, Dict
+from typing import Dict, Optional
 
 import unittest
 
-from autosklearn.pipeline.util import _test_classifier, \
-    _test_classifier_predict_proba, _test_classifier_iterative_fit
-from autosklearn.pipeline.constants import SPARSE
-
-import sklearn.metrics
 import numpy as np
+import sklearn.metrics
 
-from test.test_pipeline.ignored_warnings import ignore_warnings, classifier_warnings
+from autosklearn.pipeline.constants import SPARSE
+from autosklearn.pipeline.util import (
+    _test_classifier,
+    _test_classifier_iterative_fit,
+    _test_classifier_predict_proba,
+)
+
+from test.test_pipeline.ignored_warnings import classifier_warnings, ignore_warnings
 
 
 class BaseClassificationComponentTest(unittest.TestCase):
@@ -29,14 +32,14 @@ def test_default_iris(self):
             return
 
         for i in range(2):
-            predictions, targets, n_calls = \
-                _test_classifier(dataset="iris",
-                                 classifier=self.module)
-            self.assertAlmostEqual(self.res["default_iris"],
-                                   sklearn.metrics.accuracy_score(targets,
-                                                                  predictions),
-                                   places=self.res.get(
-                                           "default_iris_places", 7))
+            predictions, targets, n_calls = _test_classifier(
+                dataset="iris", classifier=self.module
+            )
+            self.assertAlmostEqual(
+                self.res["default_iris"],
+                sklearn.metrics.accuracy_score(targets, predictions),
+                places=self.res.get("default_iris_places", 7),
+            )
 
             if self.res.get("iris_n_calls"):
                 self.assertEqual(self.res["iris_n_calls"], n_calls)
@@ -45,7 +48,7 @@ def test_get_max_iter(self):
         if self.__class__ == BaseClassificationComponentTest:
             return
 
-        if not hasattr(self.module, 'iterative_fit'):
+        if not hasattr(self.module, "iterative_fit"):
             return
 
         self.module.get_max_iter()
@@ -55,23 +58,25 @@ def test_default_iris_iterative_fit(self):
         if self.__class__ == BaseClassificationComponentTest:
             return
 
-        if not hasattr(self.module, 'iterative_fit'):
+        if not hasattr(self.module, "iterative_fit"):
             return
 
         for i in range(2):
-            predictions, targets, classifier = \
-                _test_classifier_iterative_fit(dataset="iris",
-                                               classifier=self.module)
-            self.assertAlmostEqual(self.res["default_iris_iterative"],
-                                   sklearn.metrics.accuracy_score(targets,
-                                                                  predictions),
-                                   places=self.res.get(
-                                           "default_iris_iterative_places", 7))
+            predictions, targets, classifier = _test_classifier_iterative_fit(
+                dataset="iris", classifier=self.module
+            )
+            self.assertAlmostEqual(
+                self.res["default_iris_iterative"],
+                sklearn.metrics.accuracy_score(targets, predictions),
+                places=self.res.get("default_iris_iterative_places", 7),
+            )
 
             if self.step_hyperparameter is not None:
                 self.assertEqual(
-                    getattr(classifier.estimator, self.step_hyperparameter['name']),
-                    self.res.get("iris_iterative_n_iter", self.step_hyperparameter['value'])
+                    getattr(classifier.estimator, self.step_hyperparameter["name"]),
+                    self.res.get(
+                        "iris_iterative_n_iter", self.step_hyperparameter["value"]
+                    ),
                 )
 
     def test_default_iris_predict_proba(self):
@@ -86,7 +91,7 @@ def test_default_iris_predict_proba(self):
             self.assertAlmostEqual(
                 self.res["default_iris_proba"],
                 sklearn.metrics.log_loss(targets, predictions),
-                places=self.res.get("default_iris_proba_places", 7)
+                places=self.res.get("default_iris_proba_places", 7),
             )
 
     def test_default_iris_sparse(self):
@@ -98,15 +103,14 @@ def test_default_iris_sparse(self):
             return
 
         for i in range(2):
-            predictions, targets, _ = \
-                _test_classifier(dataset="iris",
-                                 classifier=self.module,
-                                 sparse=True)
-            self.assertAlmostEqual(self.res["default_iris_sparse"],
-                                   sklearn.metrics.accuracy_score(targets,
-                                                                  predictions),
-                                   places=self.res.get(
-                                           "default_iris_sparse_places", 7))
+            predictions, targets, _ = _test_classifier(
+                dataset="iris", classifier=self.module, sparse=True
+            )
+            self.assertAlmostEqual(
+                self.res["default_iris_sparse"],
+                sklearn.metrics.accuracy_score(targets, predictions),
+                places=self.res.get("default_iris_sparse_places", 7),
+            )
 
     def test_default_digits_binary(self):
 
@@ -114,15 +118,14 @@ def test_default_digits_binary(self):
             return
 
         for i in range(2):
-            predictions, targets, _ = \
-                _test_classifier(classifier=self.module,
-                                 dataset='digits', sparse=False,
-                                 make_binary=True)
-            self.assertAlmostEqual(self.res["default_digits_binary"],
-                                   sklearn.metrics.accuracy_score(
-                                       targets, predictions),
-                                   places=self.res.get(
-                                           "default_digits_binary_places", 7))
+            predictions, targets, _ = _test_classifier(
+                classifier=self.module, dataset="digits", sparse=False, make_binary=True
+            )
+            self.assertAlmostEqual(
+                self.res["default_digits_binary"],
+                sklearn.metrics.accuracy_score(targets, predictions),
+                places=self.res.get("default_digits_binary_places", 7),
+            )
 
     def test_default_digits(self):
 
@@ -130,14 +133,14 @@ def test_default_digits(self):
             return
 
         for i in range(2):
-            predictions, targets, n_calls = \
-                _test_classifier(dataset="digits",
-                                 classifier=self.module)
-            self.assertAlmostEqual(self.res["default_digits"],
-                                   sklearn.metrics.accuracy_score(targets,
-                                                                  predictions),
-                                   places=self.res.get(
-                                           "default_digits_places", 7))
+            predictions, targets, n_calls = _test_classifier(
+                dataset="digits", classifier=self.module
+            )
+            self.assertAlmostEqual(
+                self.res["default_digits"],
+                sklearn.metrics.accuracy_score(targets, predictions),
+                places=self.res.get("default_digits_places", 7),
+            )
 
             if self.res.get("digits_n_calls"):
                 self.assertEqual(self.res["digits_n_calls"], n_calls)
@@ -147,23 +150,25 @@ def test_default_digits_iterative_fit(self):
         if self.__class__ == BaseClassificationComponentTest:
             return
 
-        if not hasattr(self.module, 'iterative_fit'):
+        if not hasattr(self.module, "iterative_fit"):
             return
 
         for i in range(2):
-            predictions, targets, classifier = \
-                _test_classifier_iterative_fit(dataset="digits",
-                                               classifier=self.module)
-            self.assertAlmostEqual(self.res["default_digits_iterative"],
-                                   sklearn.metrics.accuracy_score(targets,
-                                                                  predictions),
-                                   places=self.res.get(
-                                           "default_digits_iterative_places", 7))
+            predictions, targets, classifier = _test_classifier_iterative_fit(
+                dataset="digits", classifier=self.module
+            )
+            self.assertAlmostEqual(
+                self.res["default_digits_iterative"],
+                sklearn.metrics.accuracy_score(targets, predictions),
+                places=self.res.get("default_digits_iterative_places", 7),
+            )
 
             if self.step_hyperparameter is not None:
                 self.assertEqual(
-                    getattr(classifier.estimator, self.step_hyperparameter['name']),
-                    self.res.get("digits_iterative_n_iter", self.step_hyperparameter['value'])
+                    getattr(classifier.estimator, self.step_hyperparameter["name"]),
+                    self.res.get(
+                        "digits_iterative_n_iter", self.step_hyperparameter["value"]
+                    ),
                 )
 
     def test_default_digits_multilabel(self):
@@ -176,15 +181,16 @@ def test_default_digits_multilabel(self):
 
         for _ in range(2):
             predictions, targets, _ = _test_classifier(
-                classifier=self.module, dataset='digits', make_multilabel=True
+                classifier=self.module, dataset="digits", make_multilabel=True
             )
 
             score = sklearn.metrics.precision_score(
-                targets, predictions, average='macro', zero_division=0
+                targets, predictions, average="macro", zero_division=0
             )
             self.assertAlmostEqual(
-                self.res["default_digits_multilabel"], score,
-                places=self.res.get("default_digits_multilabel_places", 7)
+                self.res["default_digits_multilabel"],
+                score,
+                places=self.res.get("default_digits_multilabel_places", 7),
             )
 
     def test_default_digits_multilabel_predict_proba(self):
@@ -196,15 +202,15 @@ def test_default_digits_multilabel_predict_proba(self):
             return
 
         for i in range(2):
-            predictions, targets = \
-                _test_classifier_predict_proba(classifier=self.module,
-                                               make_multilabel=True)
+            predictions, targets = _test_classifier_predict_proba(
+                classifier=self.module, make_multilabel=True
+            )
             self.assertEqual(predictions.shape, ((50, 3)))
-            self.assertAlmostEqual(self.res["default_digits_multilabel_proba"],
-                                   sklearn.metrics.roc_auc_score(
-                                       targets, predictions, average='macro'),
-                                   places=self.res.get(
-                                           "default_digits_multilabel_proba_places", 7))
+            self.assertAlmostEqual(
+                self.res["default_digits_multilabel_proba"],
+                sklearn.metrics.roc_auc_score(targets, predictions, average="macro"),
+                places=self.res.get("default_digits_multilabel_proba_places", 7),
+            )
 
     def test_target_algorithm_multioutput_multiclass_support(self):
 
@@ -218,42 +224,66 @@ def test_target_algorithm_multioutput_multiclass_support(self):
             X = np.random.random((10, 10))
             y = np.random.randint(0, 1, size=(10, 10))
             self.assertRaisesRegex(
-                ValueError,
-                'bad input shape \\(10, 10\\)',
-                cls.fit,
-                X,
-                y
+                ValueError, "bad input shape \\(10, 10\\)", cls.fit, X, y
             )
         else:
             return
 
     def test_module_idempotent(self):
-        """ Fitting twice with the same config gives the same model params.
+        """Fitting twice with the same config gives the same model params.
 
-            This is only valid when the random_state passed is an int. If a
-            RandomState object is passed then repeated calls to fit will have
-            different results. See the section on "Controlling Randomness" in the
-            sklearn docs.
+        This is only valid when the random_state passed is an int. If a
+        RandomState object is passed then repeated calls to fit will have
+        different results. See the section on "Controlling Randomness" in the
+        sklearn docs.
 
-            https://scikit-learn.org/0.24/common_pitfalls.html#controlling-randomness
+        https://scikit-learn.org/0.24/common_pitfalls.html#controlling-randomness
         """
         if self.__class__ == BaseClassificationComponentTest:
             return
 
         classifier_cls = self.module
 
-        X = np.array([
-            [0, 0], [0, 1], [1, 0], [1, 1],
-            [0, 0], [0, 1], [1, 0], [1, 1],
-            [0, 0], [0, 1], [1, 0], [1, 1],
-            [0, 0], [0, 1], [1, 0], [1, 1],
-        ])
-        y = np.array([
-            0, 1, 1, 0,
-            0, 1, 1, 0,
-            0, 1, 1, 0,
-            0, 1, 1, 0,
-        ])
+        X = np.array(
+            [
+                [0, 0],
+                [0, 1],
+                [1, 0],
+                [1, 1],
+                [0, 0],
+                [0, 1],
+                [1, 0],
+                [1, 1],
+                [0, 0],
+                [0, 1],
+                [1, 0],
+                [1, 1],
+                [0, 0],
+                [0, 1],
+                [1, 0],
+                [1, 1],
+            ]
+        )
+        y = np.array(
+            [
+                0,
+                1,
+                1,
+                0,
+                0,
+                1,
+                1,
+                0,
+                0,
+                1,
+                1,
+                0,
+                0,
+                1,
+                1,
+                0,
+            ]
+        )
 
         # There are certain errors we ignore so we wrap this in a function
         def fitted_params(model) -> Optional[Dict]:
@@ -268,12 +298,18 @@ def is_QDA_error(err):
             # We are okay if the BaseClassifier in AdaBoostClassifier is worse
             # than random so no ensemble can be fit
             def is_AdaBoostClassifier_error(err):
-                return ("BaseClassifier in AdaBoostClassifier ensemble is worse"
-                        + " than random, ensemble can not be fit." in err.args[0])
+                return (
+                    "BaseClassifier in AdaBoostClassifier ensemble is worse"
+                    + " than random, ensemble can not be fit."
+                    in err.args[0]
+                )
 
             def is_unset_param_raw_predictions_val_error(err):
-                return ("local variable 'raw_predictions_val' referenced before"
-                        + " assignment" in err.args[0])
+                return (
+                    "local variable 'raw_predictions_val' referenced before"
+                    + " assignment"
+                    in err.args[0]
+                )
 
             try:
                 with ignore_warnings(classifier_warnings):
@@ -288,7 +324,7 @@ def is_unset_param_raw_predictions_val_error(err):
             return model.estimator.get_params()
 
         # We ignore certain keys when comparing
-        param_keys_ignored = ['base_estimator']
+        param_keys_ignored = ["base_estimator"]
 
         # We use the default config + sampled ones
         configuration_space = classifier_cls.get_hyperparameter_search_space()
@@ -302,12 +338,12 @@ def is_unset_param_raw_predictions_val_error(err):
 
             # Get the parameters on the first and second fit with config params
             params_first = fitted_params(classifier)
-            if hasattr(classifier.estimator, 'random_state'):
+            if hasattr(classifier.estimator, "random_state"):
                 rs_1 = classifier.random_state
                 rs_estimator_1 = classifier.estimator.random_state
 
             params_second = fitted_params(classifier)
-            if hasattr(classifier.estimator, 'random_state'):
+            if hasattr(classifier.estimator, "random_state"):
                 rs_2 = classifier.random_state
                 rs_estimator_2 = classifier.estimator.random_state
 
@@ -322,10 +358,13 @@ def is_unset_param_raw_predictions_val_error(err):
                         del params[key]
 
             # They should have equal parameters
-            self.assertEqual(params_first, params_second,
-                             f"Failed with model args {model_args}")
-            if hasattr(classifier.estimator, 'random_state'):
-                assert all([
-                    seed == random_state
-                    for random_state in [rs_1, rs_estimator_1, rs_2, rs_estimator_2]
-                ])
+            self.assertEqual(
+                params_first, params_second, f"Failed with model args {model_args}"
+            )
+            if hasattr(classifier.estimator, "random_state"):
+                assert all(
+                    [
+                        seed == random_state
+                        for random_state in [rs_1, rs_estimator_1, rs_2, rs_estimator_2]
+                    ]
+                )
diff --git a/test/test_pipeline/components/classification/test_bernoulli_nb.py b/test/test_pipeline/components/classification/test_bernoulli_nb.py
index 8384119393..2def3a385f 100644
--- a/test/test_pipeline/components/classification/test_bernoulli_nb.py
+++ b/test/test_pipeline/components/classification/test_bernoulli_nb.py
@@ -1,7 +1,6 @@
 import sklearn.naive_bayes
 
-from autosklearn.pipeline.components.classification.bernoulli_nb import \
-    BernoulliNB
+from autosklearn.pipeline.components.classification.bernoulli_nb import BernoulliNB
 
 from .test_base import BaseClassificationComponentTest
 
diff --git a/test/test_pipeline/components/classification/test_decision_tree.py b/test/test_pipeline/components/classification/test_decision_tree.py
index e32a6536c7..546040e645 100644
--- a/test/test_pipeline/components/classification/test_decision_tree.py
+++ b/test/test_pipeline/components/classification/test_decision_tree.py
@@ -1,7 +1,6 @@
 import sklearn.tree
 
-from autosklearn.pipeline.components.classification.decision_tree import \
-    DecisionTree
+from autosklearn.pipeline.components.classification.decision_tree import DecisionTree
 
 from .test_base import BaseClassificationComponentTest
 
diff --git a/test/test_pipeline/components/classification/test_extra_trees.py b/test/test_pipeline/components/classification/test_extra_trees.py
index e7b1935db0..213bfbd916 100644
--- a/test/test_pipeline/components/classification/test_extra_trees.py
+++ b/test/test_pipeline/components/classification/test_extra_trees.py
@@ -1,6 +1,8 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.classification.extra_trees import ExtraTreesClassifier
+from autosklearn.pipeline.components.classification.extra_trees import (
+    ExtraTreesClassifier,
+)
 
 from .test_base import BaseClassificationComponentTest
 
@@ -12,12 +14,12 @@ class ExtraTreesComponentTest(BaseClassificationComponentTest):
     res = dict()
     res["default_iris"] = 0.96
     res["iris_n_calls"] = 9
-    res["default_iris_iterative"] = res['default_iris']
+    res["default_iris_iterative"] = res["default_iris"]
     res["default_iris_proba"] = 0.10053485167017469
     res["default_iris_sparse"] = 0.74
     res["default_digits"] = 0.9216757741347905
     res["digits_n_calls"] = 9
-    res["default_digits_iterative"] = res['default_digits']
+    res["default_digits_iterative"] = res["default_digits"]
     res["default_digits_iterative_places"] = 3
     res["default_digits_binary"] = 0.994535519125683
     res["default_digits_multilabel"] = 0.9983621593291405
@@ -26,6 +28,6 @@ class ExtraTreesComponentTest(BaseClassificationComponentTest):
     sk_mod = sklearn.ensemble.ExtraTreesClassifier
     module = ExtraTreesClassifier
     step_hyperparameter = {
-        'name': 'n_estimators',
-        'value': module.get_max_iter(),
+        "name": "n_estimators",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/classification/test_gaussian_nb.py b/test/test_pipeline/components/classification/test_gaussian_nb.py
index ea5ce7cc5b..2f813b4293 100644
--- a/test/test_pipeline/components/classification/test_gaussian_nb.py
+++ b/test/test_pipeline/components/classification/test_gaussian_nb.py
@@ -1,7 +1,6 @@
 import sklearn.naive_bayes
 
-from autosklearn.pipeline.components.classification.gaussian_nb import \
-    GaussianNB
+from autosklearn.pipeline.components.classification.gaussian_nb import GaussianNB
 
 from .test_base import BaseClassificationComponentTest
 
diff --git a/test/test_pipeline/components/classification/test_gradient_boosting.py b/test/test_pipeline/components/classification/test_gradient_boosting.py
index efa3a3cca8..4bfadfa74c 100644
--- a/test/test_pipeline/components/classification/test_gradient_boosting.py
+++ b/test/test_pipeline/components/classification/test_gradient_boosting.py
@@ -1,7 +1,8 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.classification.gradient_boosting import \
-    GradientBoostingClassifier
+from autosklearn.pipeline.components.classification.gradient_boosting import (
+    GradientBoostingClassifier,
+)
 
 from .test_base import BaseClassificationComponentTest
 
@@ -24,6 +25,6 @@ class GradientBoostingComponentTest(BaseClassificationComponentTest):
     sk_mod = sklearn.ensemble.ExtraTreesClassifier
     module = GradientBoostingClassifier
     step_hyperparameter = {
-        'name': 'max_iter',
-        'value': module.get_max_iter(),
+        "name": "max_iter",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/classification/test_k_nearest_neighbor.py b/test/test_pipeline/components/classification/test_k_nearest_neighbor.py
index 8209e2a674..d09512d07d 100644
--- a/test/test_pipeline/components/classification/test_k_nearest_neighbor.py
+++ b/test/test_pipeline/components/classification/test_k_nearest_neighbor.py
@@ -1,7 +1,8 @@
 import sklearn.neighbors
 
-from autosklearn.pipeline.components.classification.k_nearest_neighbors import \
-    KNearestNeighborsClassifier
+from autosklearn.pipeline.components.classification.k_nearest_neighbors import (
+    KNearestNeighborsClassifier,
+)
 
 from .test_base import BaseClassificationComponentTest
 
diff --git a/test/test_pipeline/components/classification/test_liblinear.py b/test/test_pipeline/components/classification/test_liblinear.py
index bb2d2a1894..1aec8e227e 100644
--- a/test/test_pipeline/components/classification/test_liblinear.py
+++ b/test/test_pipeline/components/classification/test_liblinear.py
@@ -1,7 +1,6 @@
 import sklearn.svm
 
-from autosklearn.pipeline.components.classification.liblinear_svc import \
-    LibLinear_SVC
+from autosklearn.pipeline.components.classification.liblinear_svc import LibLinear_SVC
 
 from .test_base import BaseClassificationComponentTest
 
diff --git a/test/test_pipeline/components/classification/test_libsvm_svc.py b/test/test_pipeline/components/classification/test_libsvm_svc.py
index dcab429fc1..6fe95f5b62 100644
--- a/test/test_pipeline/components/classification/test_libsvm_svc.py
+++ b/test/test_pipeline/components/classification/test_libsvm_svc.py
@@ -2,8 +2,7 @@
 import sklearn.svm
 
 from autosklearn.pipeline.components.classification.libsvm_svc import LibSVM_SVC
-from autosklearn.pipeline.util import get_dataset, \
-    _test_classifier_predict_proba
+from autosklearn.pipeline.util import _test_classifier_predict_proba, get_dataset
 
 from .test_base import BaseClassificationComponentTest
 
@@ -30,22 +29,23 @@ def test_default_configuration_predict_proba_individual(self):
         # Leave this additional test here
         for i in range(2):
             predictions, targets = _test_classifier_predict_proba(
-                LibSVM_SVC, sparse=True, dataset='digits',
-                train_size_maximum=500)
-            self.assertAlmostEqual(5.273502056835706,
-                                   sklearn.metrics.log_loss(targets,
-                                                            predictions))
+                LibSVM_SVC, sparse=True, dataset="digits", train_size_maximum=500
+            )
+            self.assertAlmostEqual(
+                5.273502056835706, sklearn.metrics.log_loss(targets, predictions)
+            )
 
         for i in range(2):
             predictions, targets = _test_classifier_predict_proba(
-                LibSVM_SVC, sparse=True, dataset='iris')
-            self.assertAlmostEqual(0.8408320837510618,
-                                   sklearn.metrics.log_loss(targets,
-                                                            predictions))
+                LibSVM_SVC, sparse=True, dataset="iris"
+            )
+            self.assertAlmostEqual(
+                0.8408320837510618, sklearn.metrics.log_loss(targets, predictions)
+            )
 
         # 2 class
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris')
+            X_train, Y_train, X_test, Y_test = get_dataset(dataset="iris")
             remove_training_data = Y_train == 2
             remove_test_data = Y_test == 2
             X_train = X_train[~remove_training_data]
@@ -57,11 +57,19 @@ def test_default_configuration_predict_proba_individual(self):
             configuration_space = LibSVM_SVC.get_hyperparameter_search_space()
             default = configuration_space.get_default_configuration()
 
-            cls = LibSVM_SVC(random_state=1, **{hp_name: default[hp_name]
-                                                for hp_name in default
-                                                if default[hp_name] is not None})
+            cls = LibSVM_SVC(
+                random_state=1,
+                **{
+                    hp_name: default[hp_name]
+                    for hp_name in default
+                    if default[hp_name] is not None
+                },
+            )
 
             cls = cls.fit(X_train, Y_train)
             prediction = cls.predict_proba(X_test)
-            self.assertAlmostEqual(sklearn.metrics.log_loss(Y_test, prediction),
-                                   0.6927962762794081, places=4)
+            self.assertAlmostEqual(
+                sklearn.metrics.log_loss(Y_test, prediction),
+                0.6927962762794081,
+                places=4,
+            )
diff --git a/test/test_pipeline/components/classification/test_mlp.py b/test/test_pipeline/components/classification/test_mlp.py
index b8d559b1bc..e1c4286d83 100644
--- a/test/test_pipeline/components/classification/test_mlp.py
+++ b/test/test_pipeline/components/classification/test_mlp.py
@@ -43,6 +43,6 @@ class MLPComponentTest(BaseClassificationComponentTest):
     sk_mod = sklearn.neural_network.MLPClassifier
     module = MLPClassifier
     step_hyperparameter = {
-        'name': 'n_iter_',
-        'value': module.get_max_iter(),
+        "name": "n_iter_",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/classification/test_multinomial_nb.py b/test/test_pipeline/components/classification/test_multinomial_nb.py
index 2c982c41ef..c82b938679 100644
--- a/test/test_pipeline/components/classification/test_multinomial_nb.py
+++ b/test/test_pipeline/components/classification/test_multinomial_nb.py
@@ -1,10 +1,8 @@
 import numpy as np
-
 import sklearn.naive_bayes
 import sklearn.preprocessing
 
-from autosklearn.pipeline.components.classification.multinomial_nb import \
-    MultinomialNB
+from autosklearn.pipeline.components.classification.multinomial_nb import MultinomialNB
 from autosklearn.pipeline.util import get_dataset
 
 from .test_base import BaseClassificationComponentTest
@@ -32,17 +30,21 @@ class MultinomialNBComponentTest(BaseClassificationComponentTest):
 
     def test_default_configuration_negative_values(self):
         # Custon preprocessing test to check if clipping to zero works
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
         ss = sklearn.preprocessing.StandardScaler()
         X_train = ss.fit_transform(X_train)
         configuration_space = MultinomialNB.get_hyperparameter_search_space()
         default = configuration_space.get_default_configuration()
 
-        cls = MultinomialNB(random_state=1, **{hp_name: default[hp_name]
-                                               for hp_name in default
-                                               if default[hp_name] is not None})
+        cls = MultinomialNB(
+            random_state=1,
+            **{
+                hp_name: default[hp_name]
+                for hp_name in default
+                if default[hp_name] is not None
+            },
+        )
 
         cls = cls.fit(X_train, Y_train)
         prediction = cls.predict(X_test)
-        self.assertAlmostEqual(np.nanmean(prediction == Y_test),
-                               0.88888888888888884)
+        self.assertAlmostEqual(np.nanmean(prediction == Y_test), 0.88888888888888884)
diff --git a/test/test_pipeline/components/classification/test_passive_aggressive.py b/test/test_pipeline/components/classification/test_passive_aggressive.py
index d904f9e569..b83dbaf120 100644
--- a/test/test_pipeline/components/classification/test_passive_aggressive.py
+++ b/test/test_pipeline/components/classification/test_passive_aggressive.py
@@ -1,7 +1,8 @@
 import sklearn.linear_model
 
-from autosklearn.pipeline.components.classification.passive_aggressive import \
-    PassiveAggressive
+from autosklearn.pipeline.components.classification.passive_aggressive import (
+    PassiveAggressive,
+)
 
 from .test_base import BaseClassificationComponentTest
 
@@ -13,13 +14,13 @@ class PassiveAggressiveComponentTest(BaseClassificationComponentTest):
     res = dict()
     res["default_iris"] = 0.98
     res["iris_n_calls"] = 6
-    res["default_iris_iterative"] = res['default_iris']
+    res["default_iris_iterative"] = res["default_iris"]
     res["iris_iterative_n_iter"] = 64
     res["default_iris_proba"] = 0.27840521921952033
     res["default_iris_sparse"] = 0.48
     res["default_digits"] = 0.9162112932604736
     res["digits_n_calls"] = 6
-    res["default_digits_iterative"] = res['default_digits']
+    res["default_digits_iterative"] = res["default_digits"]
     res["digits_iterative_n_iter"] = 64
     res["default_digits_binary"] = 0.99210686095932
     res["default_digits_multilabel"] = 0.910908768565592
@@ -29,6 +30,6 @@ class PassiveAggressiveComponentTest(BaseClassificationComponentTest):
     module = PassiveAggressive
 
     step_hyperparameter = {
-        'name': 'max_iter',
-        'value': module.get_max_iter(),
+        "name": "max_iter",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/classification/test_random_forest.py b/test/test_pipeline/components/classification/test_random_forest.py
index 8e2c1136d3..f96869c270 100644
--- a/test/test_pipeline/components/classification/test_random_forest.py
+++ b/test/test_pipeline/components/classification/test_random_forest.py
@@ -1,7 +1,6 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.classification.random_forest import \
-    RandomForest
+from autosklearn.pipeline.components.classification.random_forest import RandomForest
 
 from .test_base import BaseClassificationComponentTest
 
@@ -13,12 +12,12 @@ class RandomForestComponentTest(BaseClassificationComponentTest):
     res = dict()
     res["default_iris"] = 0.96
     res["iris_n_calls"] = 9
-    res["default_iris_iterative"] = res['default_iris']
+    res["default_iris_iterative"] = res["default_iris"]
     res["default_iris_proba"] = 0.0996785324703419
     res["default_iris_sparse"] = 0.85999999999999999
     res["default_digits"] = 0.8998178506375227
     res["digits_n_calls"] = 9
-    res["default_digits_iterative"] = res['default_digits']
+    res["default_digits_iterative"] = res["default_digits"]
     res["default_digits_binary"] = 0.9896782027929569
     res["default_digits_multilabel"] = 0.9973653110879388
     res["default_digits_multilabel_proba"] = 0.9965660960196189
@@ -26,6 +25,6 @@ class RandomForestComponentTest(BaseClassificationComponentTest):
     sk_mod = sklearn.ensemble.RandomForestClassifier
     module = RandomForest
     step_hyperparameter = {
-        'name': 'n_estimators',
-        'value': module.get_max_iter(),
+        "name": "n_estimators",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/classification/test_sgd.py b/test/test_pipeline/components/classification/test_sgd.py
index defe8af81d..8f1d7821e1 100644
--- a/test/test_pipeline/components/classification/test_sgd.py
+++ b/test/test_pipeline/components/classification/test_sgd.py
@@ -1,6 +1,7 @@
 import sklearn.linear_model
 
 from autosklearn.pipeline.components.classification.sgd import SGD
+
 from .test_base import BaseClassificationComponentTest
 
 
@@ -11,12 +12,12 @@ class SGDComponentTest(BaseClassificationComponentTest):
     res = dict()
     res["default_iris"] = 0.69999999999999996
     res["iris_n_calls"] = 9
-    res["default_iris_iterative"] = res['default_iris']
+    res["default_iris_iterative"] = res["default_iris"]
     res["default_iris_proba"] = 0.5996114465819011
     res["default_iris_sparse"] = 0.54
     res["default_digits"] = 0.9198542805100182
     res["digits_n_calls"] = 7
-    res["default_digits_iterative"] = res['default_digits']
+    res["default_digits_iterative"] = res["default_digits"]
     res["default_digits_binary"] = 0.9951426836672739
     res["default_digits_multilabel"] = -1
     res["default_digits_multilabel_proba"] = -1
diff --git a/test/test_pipeline/components/data_preprocessing/__init__.py b/test/test_pipeline/components/data_preprocessing/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/test/test_pipeline/components/data_preprocessing/__init__.py
+++ b/test/test_pipeline/components/data_preprocessing/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_pipeline/components/data_preprocessing/test_balancing.py b/test/test_pipeline/components/data_preprocessing/test_balancing.py
index 268a8ea542..cf8dc103b8 100644
--- a/test/test_pipeline/components/data_preprocessing/test_balancing.py
+++ b/test/test_pipeline/components/data_preprocessing/test_balancing.py
@@ -1,4 +1,4 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
 
 import copy
 import unittest
@@ -7,86 +7,111 @@
 import sklearn.datasets
 import sklearn.metrics
 
-from autosklearn.pipeline.components.data_preprocessing.balancing.balancing \
-    import Balancing
 from autosklearn.pipeline.classification import SimpleClassificationPipeline
 from autosklearn.pipeline.components.classification.adaboost import AdaboostClassifier
 from autosklearn.pipeline.components.classification.decision_tree import DecisionTree
-from autosklearn.pipeline.components.classification.extra_trees import ExtraTreesClassifier
-from autosklearn.pipeline.components.classification.random_forest import RandomForest
+from autosklearn.pipeline.components.classification.extra_trees import (
+    ExtraTreesClassifier,
+)
+from autosklearn.pipeline.components.classification.gradient_boosting import (
+    GradientBoostingClassifier,
+)
 from autosklearn.pipeline.components.classification.liblinear_svc import LibLinear_SVC
 from autosklearn.pipeline.components.classification.libsvm_svc import LibSVM_SVC
+from autosklearn.pipeline.components.classification.passive_aggressive import (
+    PassiveAggressive,
+)
+from autosklearn.pipeline.components.classification.random_forest import RandomForest
 from autosklearn.pipeline.components.classification.sgd import SGD
-from autosklearn.pipeline.components.classification.gradient_boosting \
-    import GradientBoostingClassifier
-from autosklearn.pipeline.components.classification.passive_aggressive import PassiveAggressive
-from autosklearn.pipeline.components.feature_preprocessing\
-    .extra_trees_preproc_for_classification import ExtraTreesPreprocessorClassification
-from autosklearn.pipeline.components.feature_preprocessing.liblinear_svc_preprocessor import \
-    LibLinear_Preprocessor
+from autosklearn.pipeline.components.data_preprocessing.balancing.balancing import (
+    Balancing,
+)
+from autosklearn.pipeline.components.feature_preprocessing.extra_trees_preproc_for_classification import (  # noqa: E501
+    ExtraTreesPreprocessorClassification,
+)
+from autosklearn.pipeline.components.feature_preprocessing.liblinear_svc_preprocessor import (  # noqa: E501
+    LibLinear_Preprocessor,
+)
 
 
 class BalancingComponentTest(unittest.TestCase):
     def test_balancing_get_weights_treed_single_label(self):
         Y = np.array([0] * 80 + [1] * 20)
-        balancing = Balancing(strategy='weighting')
-        init_params, fit_params = balancing.get_weights(
-            Y, 'adaboost', None, None, None)
+        balancing = Balancing(strategy="weighting")
+        init_params, fit_params = balancing.get_weights(Y, "adaboost", None, None, None)
         self.assertAlmostEqual(
-            np.mean(fit_params['classifier:sample_weight']), 1,
+            np.mean(fit_params["classifier:sample_weight"]),
+            1,
         )
         np.testing.assert_allclose(
-            fit_params['classifier:sample_weight'],
+            fit_params["classifier:sample_weight"],
             np.array([0.625] * 80 + [2.5] * 20),
         )
 
     def test_balancing_get_weights_treed_multilabel(self):
-        Y = np.array([[0, 0, 0]] * 100 + [[1, 0, 0]] * 100 + [[0, 1, 0]] * 100 +
-                     [[1, 1, 0]] * 100 + [[0, 0, 1]] * 100 + [[1, 0, 1]] * 10)
-        balancing = Balancing(strategy='weighting')
-        init_params, fit_params = balancing.get_weights(
-            Y, 'adaboost', None, None, None)
-        print(fit_params['classifier:sample_weight'])
+        Y = np.array(
+            [[0, 0, 0]] * 100
+            + [[1, 0, 0]] * 100
+            + [[0, 1, 0]] * 100
+            + [[1, 1, 0]] * 100
+            + [[0, 0, 1]] * 100
+            + [[1, 0, 1]] * 10
+        )
+        balancing = Balancing(strategy="weighting")
+        init_params, fit_params = balancing.get_weights(Y, "adaboost", None, None, None)
+        print(fit_params["classifier:sample_weight"])
         self.assertAlmostEqual(
-            np.mean(fit_params['classifier:sample_weight']), 1,
+            np.mean(fit_params["classifier:sample_weight"]),
+            1,
         )
         np.testing.assert_allclose(
-            fit_params['classifier:sample_weight'],
+            fit_params["classifier:sample_weight"],
             np.array([0.85] * 500 + [8.5] * 10),
         )
 
     def test_balancing_get_weights_svm_sgd(self):
         Y = np.array([0] * 80 + [1] * 20)
-        balancing = Balancing(strategy='weighting')
+        balancing = Balancing(strategy="weighting")
         init_params, fit_params = balancing.get_weights(
-            Y, 'libsvm_svc', None, None, None)
-        self.assertEqual(("classifier:class_weight", "balanced"),
-                         list(init_params.items())[0])
+            Y, "libsvm_svc", None, None, None
+        )
+        self.assertEqual(
+            ("classifier:class_weight", "balanced"), list(init_params.items())[0]
+        )
         init_params, fit_params = balancing.get_weights(
-            Y, None, 'liblinear_svc_preprocessor', None, None)
-        self.assertEqual(("feature_preprocessor:class_weight", "balanced"),
-                         list(init_params.items())[0])
+            Y, None, "liblinear_svc_preprocessor", None, None
+        )
+        self.assertEqual(
+            ("feature_preprocessor:class_weight", "balanced"),
+            list(init_params.items())[0],
+        )
 
     def test_weighting_effect(self):
         data = sklearn.datasets.make_classification(
-            n_samples=200, n_features=10, n_redundant=2, n_informative=2,
-            n_repeated=2, n_clusters_per_class=2, weights=[0.8, 0.2],
-            random_state=1)
+            n_samples=200,
+            n_features=10,
+            n_redundant=2,
+            n_informative=2,
+            n_repeated=2,
+            n_clusters_per_class=2,
+            weights=[0.8, 0.2],
+            random_state=1,
+        )
 
         for name, clf, acc_no_weighting, acc_weighting, places in [
-            ('adaboost', AdaboostClassifier, 0.810, 0.735, 3),
-            ('decision_tree', DecisionTree, 0.780, 0.643, 3),
-            ('extra_trees', ExtraTreesClassifier, 0.78, 0.8, 3),
-            ('random_forest', RandomForest, 0.75, 0.789, 3),
-            ('libsvm_svc', LibSVM_SVC, 0.769, 0.72, 3),
-            ('liblinear_svc', LibLinear_SVC, 0.762, 0.735, 3),
-            ('passive_aggressive', PassiveAggressive, 0.16, 0.222, 3),
-            ('sgd', SGD, 0.818, 0.567, 2),
-            ('gradient_boosting', GradientBoostingClassifier, 0.666, 0.682, 2)
-         ]:
+            ("adaboost", AdaboostClassifier, 0.810, 0.735, 3),
+            ("decision_tree", DecisionTree, 0.780, 0.643, 3),
+            ("extra_trees", ExtraTreesClassifier, 0.78, 0.8, 3),
+            ("random_forest", RandomForest, 0.75, 0.789, 3),
+            ("libsvm_svc", LibSVM_SVC, 0.769, 0.72, 3),
+            ("liblinear_svc", LibLinear_SVC, 0.762, 0.735, 3),
+            ("passive_aggressive", PassiveAggressive, 0.16, 0.222, 3),
+            ("sgd", SGD, 0.818, 0.567, 2),
+            ("gradient_boosting", GradientBoostingClassifier, 0.666, 0.682, 2),
+        ]:
             for strategy, acc in [
-                ('none', acc_no_weighting),
-                ('weighting', acc_weighting)
+                ("none", acc_no_weighting),
+                ("weighting", acc_weighting),
             ]:
                 # Fit
                 data_ = copy.copy(data)
@@ -98,23 +123,25 @@ def test_weighting_effect(self):
                 model_args = {
                     "random_state": 1,
                     "include": {
-                        'classifier': [name],
-                        'feature_preprocessor': ['no_preprocessing']
-                    }
+                        "classifier": [name],
+                        "feature_preprocessor": ["no_preprocessing"],
+                    },
                 }
 
                 classifier = SimpleClassificationPipeline(**model_args)
                 cs = classifier.get_hyperparameter_search_space()
                 default = cs.get_default_configuration()
-                default._values['balancing:strategy'] = strategy
+                default._values["balancing:strategy"] = strategy
 
                 classifier = SimpleClassificationPipeline(config=default, **model_args)
                 classifier.fit(X_train, Y_train)
 
                 predictions1 = classifier.predict(X_test)
                 self.assertAlmostEqual(
-                    sklearn.metrics.f1_score(predictions1, Y_test), acc,
-                    places=places, msg=(name, strategy)
+                    sklearn.metrics.f1_score(predictions1, Y_test),
+                    acc,
+                    places=places,
+                    msg=(name, strategy),
                 )
 
                 # fit_transformer and fit_estimator
@@ -130,39 +157,53 @@ def test_weighting_effect(self):
 
                 predictions2 = classifier.predict(X_test)
                 np.testing.assert_allclose(
-                    predictions1, predictions2,
-                    err_msg=f"name = {name}, strategy = {strategy}"
+                    predictions1,
+                    predictions2,
+                    err_msg=f"name = {name}, strategy = {strategy}",
                 )
                 self.assertAlmostEqual(
-                    sklearn.metrics.f1_score(predictions2, Y_test), acc,
-                    places=places, msg=(name, strategy)
+                    sklearn.metrics.f1_score(predictions2, Y_test),
+                    acc,
+                    places=places,
+                    msg=(name, strategy),
                 )
 
-        for name, pre, acc_no_weighting, acc_weighting in \
-                [('extra_trees_preproc_for_classification',
-                    ExtraTreesPreprocessorClassification, 0.810, 0.590),
-                 ('liblinear_svc_preprocessor', LibLinear_Preprocessor,
-                    0.837, 0.562)]:
-            for strategy, acc in [('none', acc_no_weighting),
-                                  ('weighting', acc_weighting)]:
+        for name, pre, acc_no_weighting, acc_weighting in [
+            (
+                "extra_trees_preproc_for_classification",
+                ExtraTreesPreprocessorClassification,
+                0.810,
+                0.590,
+            ),
+            ("liblinear_svc_preprocessor", LibLinear_Preprocessor, 0.837, 0.562),
+        ]:
+            for strategy, acc in [
+                ("none", acc_no_weighting),
+                ("weighting", acc_weighting),
+            ]:
                 data_ = copy.copy(data)
                 X_train = data_[0][:100]
                 Y_train = data_[1][:100]
                 X_test = data_[0][100:]
                 Y_test = data_[1][100:]
 
-                include = {'classifier': ['sgd'], 'feature_preprocessor': [name]}
+                include = {"classifier": ["sgd"], "feature_preprocessor": [name]}
 
-                classifier = SimpleClassificationPipeline(random_state=1, include=include)
+                classifier = SimpleClassificationPipeline(
+                    random_state=1, include=include
+                )
                 cs = classifier.get_hyperparameter_search_space()
                 default = cs.get_default_configuration()
-                default._values['balancing:strategy'] = strategy
+                default._values["balancing:strategy"] = strategy
                 classifier.set_hyperparameters(default)
                 predictor = classifier.fit(X_train, Y_train)
                 predictions = predictor.predict(X_test)
                 self.assertAlmostEqual(
-                    sklearn.metrics.f1_score(predictions, Y_test), acc,
-                    places=3, msg=(name, strategy))
+                    sklearn.metrics.f1_score(predictions, Y_test),
+                    acc,
+                    places=3,
+                    msg=(name, strategy),
+                )
 
                 # fit_transformer and fit_estimator
                 data_ = copy.copy(data)
@@ -171,11 +212,13 @@ def test_weighting_effect(self):
                 X_test = data_[0][100:]
                 Y_test = data_[1][100:]
 
-                default._values['balancing:strategy'] = strategy
-                classifier = SimpleClassificationPipeline(default, random_state=1, include=include)
+                default._values["balancing:strategy"] = strategy
+                classifier = SimpleClassificationPipeline(
+                    default, random_state=1, include=include
+                )
                 Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
                 classifier.fit_estimator(Xt, Y_train, **fit_params)
                 predictions = classifier.predict(X_test)
                 self.assertAlmostEqual(
-                    sklearn.metrics.f1_score(predictions, Y_test), acc,
-                    places=3)
+                    sklearn.metrics.f1_score(predictions, Y_test), acc, places=3
+                )
diff --git a/test/test_pipeline/components/data_preprocessing/test_categorical_imputation.py b/test/test_pipeline/components/data_preprocessing/test_categorical_imputation.py
index 2767093179..d50e8cf842 100644
--- a/test/test_pipeline/components/data_preprocessing/test_categorical_imputation.py
+++ b/test/test_pipeline/components/data_preprocessing/test_categorical_imputation.py
@@ -1,11 +1,11 @@
 import numpy as np
-from scipy import sparse
-
 import pandas as pd
 import pytest
+from scipy import sparse
 
-from autosklearn.pipeline.components.data_preprocessing.imputation.categorical_imputation\
-    import CategoricalImputation
+from autosklearn.pipeline.components.data_preprocessing.imputation.categorical_imputation import (  # noqa: E501
+    CategoricalImputation,
+)
 
 
 @pytest.fixture
@@ -14,15 +14,15 @@ def input_data_imputation(request):
     X = np.array(np.random.randint(3, 10, size=size), dtype=float)
     mask = np.logical_not(np.random.randint(0, 5, size=size), dtype=bool)
     X[mask] = np.nan
-    if request.param == 'numpy':
+    if request.param == "numpy":
         pass
-    elif request.param == 'pandas':
+    elif request.param == "pandas":
         X = pd.DataFrame(X)
     return X, mask
 
 
-@pytest.mark.parametrize('input_data_imputation', ('numpy', 'pandas'), indirect=True)
-@pytest.mark.parametrize('categorical', (True, False))
+@pytest.mark.parametrize("input_data_imputation", ("numpy", "pandas"), indirect=True)
+@pytest.mark.parametrize("categorical", (True, False))
 def test_default_imputation(input_data_imputation, categorical):
     """
     Makes sure that imputation works for both numerical and categorical data.
@@ -30,8 +30,8 @@ def test_default_imputation(input_data_imputation, categorical):
     """
     X, mask = input_data_imputation
     if categorical:
-        imputation_value = 'missing_value'
-        X = X.astype('str').astype('object')
+        imputation_value = "missing_value"
+        X = X.astype("str").astype("object")
         X[mask] = np.nan
     else:
         imputation_value = min(np.unique(X)) - 1
@@ -42,15 +42,15 @@ def test_default_imputation(input_data_imputation, categorical):
     assert np.array_equal(Y != imputation_value, ~mask)
 
 
-@pytest.mark.parametrize('format_type', ('numpy', 'pandas'))
+@pytest.mark.parametrize("format_type", ("numpy", "pandas"))
 def test_nonzero_numerical_imputation(format_type):
     # First try with an array with 0 as only valid category. The imputation should
     # happen with -1
     X = np.full(fill_value=np.nan, shape=(10, 10))
     X[0, :] = 0
-    if 'pandas' in format_type:
+    if "pandas" in format_type:
         X = pd.DataFrame(X)
-    elif 'numpy' in format_type:
+    elif "numpy" in format_type:
         pass
     else:
         pytest.fail(format_type)
@@ -61,13 +61,13 @@ def test_nonzero_numerical_imputation(format_type):
     X = np.full(fill_value=np.nan, shape=(10, 10))
     X[0, :] = 0
     X[1, :] = -1
-    if 'pandas' in format_type:
+    if "pandas" in format_type:
         X = pd.DataFrame(X)
     Y = CategoricalImputation().fit_transform(X.copy())
     np.testing.assert_equal(np.nan_to_num(X, nan=-2, copy=True), Y)
 
 
-@pytest.mark.parametrize('input_data_imputation', ('numpy'), indirect=True)
+@pytest.mark.parametrize("input_data_imputation", ("numpy"), indirect=True)
 def test_default_sparse(input_data_imputation):
     X, mask = input_data_imputation
     X = sparse.csr_matrix(X)
diff --git a/test/test_pipeline/components/data_preprocessing/test_category_shift.py b/test/test_pipeline/components/data_preprocessing/test_category_shift.py
index d49e6a84f0..ce637f50d4 100644
--- a/test/test_pipeline/components/data_preprocessing/test_category_shift.py
+++ b/test/test_pipeline/components/data_preprocessing/test_category_shift.py
@@ -1,19 +1,21 @@
 import unittest
+
 import numpy as np
 import scipy.sparse
 
-from autosklearn.pipeline.components.data_preprocessing.category_shift.\
-    category_shift import CategoryShift
+from autosklearn.pipeline.components.data_preprocessing.category_shift.category_shift import (  # noqa: E501
+    CategoryShift,
+)
 
 
 class CategoryShiftTest(unittest.TestCase):
-
     def test_data_type_consistency(self):
         X = np.random.randint(0, 255, (3, 4))
         Y = CategoryShift().fit_transform(X)
         self.assertFalse(scipy.sparse.issparse(Y))
 
         X = scipy.sparse.csc_matrix(
-            ([1, 2, 0, 4], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4))
+            ([1, 2, 0, 4], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
+        )
         Y = CategoryShift().fit_transform(X)
         self.assertTrue(scipy.sparse.issparse(Y))
diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing.py
index 5e6f89ad3a..ac8e9abbe2 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing.py
@@ -1,13 +1,14 @@
 import unittest
+
 import numpy as np
 from scipy import sparse
 
-from autosklearn.pipeline.components.data_preprocessing.feature_type \
-    import FeatTypeSplit
+from autosklearn.pipeline.components.data_preprocessing.feature_type import (
+    FeatTypeSplit,
+)
 
 
 class PreprocessingPipelineTest(unittest.TestCase):
-
     def do_a_fit_transform(self, sparse_input):
         # X will be the input and Y is what we expect after transform. categ_feat stores
         # indicators of feature type (True if categorical, False if numerical)
@@ -21,58 +22,57 @@ def do_a_fit_transform(self, sparse_input):
         # This feature should be normalized by having its mean subtracted from all
         # elements and by having them divided by the standard deviation.
         categ_feat.append(False)
-        nf = np.array([1., 2., 3.]).reshape(3, 1)  # mean = 2.
-        sdev = np.sqrt(2. / 3.)
-        shift = 0 if sparse_input else 2.  # if sparse_input, there is no mean subtraction
+        nf = np.array([1.0, 2.0, 3.0]).reshape(3, 1)  # mean = 2.
+        sdev = np.sqrt(2.0 / 3.0)
+        shift = (
+            0 if sparse_input else 2.0
+        )  # if sparse_input, there is no mean subtraction
         nft = (nf - shift) / sdev
         X.append(nf)
         Y.append(nft)
         # Feature 3 (numerical):
-        # This feature has a missing value that should be imputed by the mean of the other
-        # values (2.). This feature should also be normalized as in the previous feature.
+        # This feature has a missing value that should be imputed by the mean of the
+        # other values (2.).
+        # This feature should also be normalized as in the previous feature.
         categ_feat.append(False)
-        X.append(np.array([1., np.nan, 3.]).reshape(3, 1))
+        X.append(np.array([1.0, np.nan, 3.0]).reshape(3, 1))
         Y.append(nft.copy())
         # Feature 4 (categorical)
         # This feature should be one hot encoded.
         categ_feat.append(True)
         X.append(np.array([1, 3, 2]).reshape(3, 1))
-        Y.append(np.array([
-            [1, 0, 0],
-            [0, 0, 1],
-            [0, 1, 0]]))
+        Y.append(np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0]]))
         # Feature 5 (categorical)
         # This feature should be one hot encoded. (A discontinuous category set or
         # a category 0 shouldn't be problems.)
         categ_feat.append(True)
         X.append(np.array([2, 1, 9]).reshape(3, 1))
-        Y.append(np.array([
-            [0, 1, 0],
-            [1, 0, 0],
-            [0, 0, 1]]))
+        Y.append(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]]))
         # Feature 6 (categorical)
         # This feature should be one hot encoded. The missing value gets imputed as
         # a category on its own.
         categ_feat.append(True)
         X.append(np.array([1, 1, np.nan]).reshape(3, 1))
-        Y.append(np.array([
-            [0, 1],
-            [0, 1],
-            [1, 0]]))
+        Y.append(np.array([[0, 1], [0, 1], [1, 0]]))
         # Combine datasets and shuffle columns:
         n_feats = len(categ_feat)
         random_order = np.random.choice(np.arange(n_feats), size=n_feats, replace=False)
         # Shuffle X according to random_order
         X = np.array(X)[random_order]
         X_comb = np.hstack(X)
-        # Shuffle Y according to random_order and reorder it as the PreprocessingPipeline
-        # does (i.e. categorical features come first in Y).
+        # Shuffle Y according to random_order and reorder it as the
+        # PreprocessingPipeline does (i.e. categorical features come first in Y).
 
-        categ_feat = {i: 'categorical' if categ_feat[order] else 'numerical'
-                      for i, order in enumerate(random_order)}
-        cat_to_left_order = [index for col, index in sorted(
-            [(col_type, i) for i, col_type in categ_feat.items()]
-        )]
+        categ_feat = {
+            i: "categorical" if categ_feat[order] else "numerical"
+            for i, order in enumerate(random_order)
+        }
+        cat_to_left_order = [
+            index
+            for col, index in sorted(
+                [(col_type, i) for i, col_type in categ_feat.items()]
+            )
+        ]
         # Sort so that Y Matches the random ordering
         Y = [Y[n] for n in random_order]
         # Then move the categorical columns to the left
@@ -101,15 +101,21 @@ def test_fit_transform_sparse(self):
 
     def test_string_categories(self):
         # Numerical dataset (as used in NumericalPreprocessingPipelineTest)
-        X_num = np.array([
-            [3.14, 1.,     1.],   # noqa : matrix legibility
-            [3.14, 2., np.nan],   # noqa : matrix legibility
-            [3.14, 3.,     3.]])  # noqa : matrix legibility
+        X_num = np.array(
+            [
+                [3.14, 1.0, 1.0],  # noqa : matrix legibility
+                [3.14, 2.0, np.nan],  # noqa : matrix legibility
+                [3.14, 3.0, 3.0],
+            ]
+        )  # noqa : matrix legibility
         # Categorical string dataset
-        X_cat = np.array([
-            ['red', 'medium', 'small'],
-            ['blue', 'short', 'big'],
-            ['white', 'tall', np.nan]])
+        X_cat = np.array(
+            [
+                ["red", "medium", "small"],
+                ["blue", "short", "big"],
+                ["white", "tall", np.nan],
+            ]
+        )
         # Combined dataset with shuffled columns:
         X_comb = np.hstack((X_num, X_cat))
         categ_feat = [False] * 3 + [True] * 3
@@ -118,6 +124,8 @@ def test_string_categories(self):
         categ_feat = [categ_feat[order] for order in random_order]
         # Strings are not allowed, therefore:
         with self.assertRaises(ValueError):
-            categ_feat = {i: 'categorical' if feat else 'numerical'
-                          for i, feat in enumerate(categ_feat)}
+            categ_feat = {
+                i: "categorical" if feat else "numerical"
+                for i, feat in enumerate(categ_feat)
+            }
             FeatTypeSplit(feat_type=categ_feat).fit_transform(X_comb)
diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_categorical.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_categorical.py
index dbffe26f51..1d693eb150 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_categorical.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_categorical.py
@@ -1,34 +1,35 @@
 import unittest
-import numpy as np
-from scipy import sparse
 
+import numpy as np
 import pytest
+from scipy import sparse
 
-from autosklearn.pipeline.components.data_preprocessing.feature_type_categorical \
-    import CategoricalPreprocessingPipeline
+from autosklearn.pipeline.components.data_preprocessing.feature_type_categorical import (  # noqa: E501
+    CategoricalPreprocessingPipeline,
+)
 
 
 class CategoricalPreprocessingPipelineTest(unittest.TestCase):
-
     def test_data_type_consistency(self):
         X = np.random.randint(3, 6, (3, 4))
         Y = CategoricalPreprocessingPipeline().fit_transform(X)
         self.assertFalse(sparse.issparse(Y))
 
         X = sparse.csc_matrix(
-            ([3, 6, 4, 5], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4))
+            ([3, 6, 4, 5], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
+        )
         Y = CategoricalPreprocessingPipeline().fit_transform(X)
         self.assertTrue(sparse.issparse(Y))
 
     def test_fit_transform(self):
-        X = np.array([
-            [1, 2, 1],
-            [3, 1, 1],
-            [2, 9, np.nan]])
-        Y = np.array([
-            [1, 0, 0, 0, 1, 0, 0, 1],
-            [0, 0, 1, 1, 0, 0, 0, 1],
-            [0, 1, 0, 0, 0, 1, 1, 0]])
+        X = np.array([[1, 2, 1], [3, 1, 1], [2, 9, np.nan]])
+        Y = np.array(
+            [
+                [1, 0, 0, 0, 1, 0, 0, 1],
+                [0, 0, 1, 1, 0, 0, 0, 1],
+                [0, 1, 0, 0, 0, 1, 1, 0],
+            ]
+        )
         # dense input
         # Notice the X.copy() here as the imputation
         # is in place to save resources
@@ -41,30 +42,30 @@ def test_fit_transform(self):
         np.testing.assert_array_equal(Yt, Y)
 
     def test_transform(self):
-        X1 = np.array([
-            [1, 2, 0],
-            [3, 0, 0],
-            [2, 9, np.nan]])
-        Y1 = np.array([
-            [1, 0, 0, 0, 1, 0, 0, 1],
-            [0, 0, 1, 1, 0, 0, 0, 1],
-            [0, 1, 0, 0, 0, 1, 1, 0]])
-        X2 = np.array([
-            [2, 2, 1],
-            [3, 0, 0],
-            [2, np.nan, np.nan]])
-        Y2 = np.array([
-            [0, 1, 0, 0, 1, 0, 0, 0],
-            [0, 0, 1, 1, 0, 0, 0, 1],
-            [0, 1, 0, 0, 0, 0, 1, 0]])
-        X3 = np.array([
-            [3, np.nan, 0],
-            [3, 9, np.nan],
-            [2, 2, 5]])
-        Y3 = np.array([
-            [0, 0, 1, 0, 0, 0, 0, 1],
-            [0, 0, 1, 0, 0, 1, 1, 0],
-            [0, 1, 0, 0, 1, 0, 0, 0]])
+        X1 = np.array([[1, 2, 0], [3, 0, 0], [2, 9, np.nan]])
+        Y1 = np.array(
+            [
+                [1, 0, 0, 0, 1, 0, 0, 1],
+                [0, 0, 1, 1, 0, 0, 0, 1],
+                [0, 1, 0, 0, 0, 1, 1, 0],
+            ]
+        )
+        X2 = np.array([[2, 2, 1], [3, 0, 0], [2, np.nan, np.nan]])
+        Y2 = np.array(
+            [
+                [0, 1, 0, 0, 1, 0, 0, 0],
+                [0, 0, 1, 1, 0, 0, 0, 1],
+                [0, 1, 0, 0, 0, 0, 1, 0],
+            ]
+        )
+        X3 = np.array([[3, np.nan, 0], [3, 9, np.nan], [2, 2, 5]])
+        Y3 = np.array(
+            [
+                [0, 0, 1, 0, 0, 0, 0, 1],
+                [0, 0, 1, 0, 0, 1, 1, 0],
+                [0, 1, 0, 0, 1, 0, 0, 0],
+            ]
+        )
         # "fit"
         CPPL = CategoricalPreprocessingPipeline()
         CPPL.fit_transform(X1)
@@ -81,13 +82,15 @@ def test_transform(self):
     def test_transform_with_coalescence(self):
         # Generates an array with categories 0, 20, 5, 6, 10, and occurences of 60%,
         # 30%, 19% 0.5% and 0.5% respectively
-        X = np.vstack((
-            np.ones((120, 10)) * 0,
-            np.ones((60, 10)) * 20,
-            np.ones((18, 10)) * 5,
-            np.ones((1, 10)) * 6,
-            np.ones((1, 10)) * 10,
-        ))
+        X = np.vstack(
+            (
+                np.ones((120, 10)) * 0,
+                np.ones((60, 10)) * 20,
+                np.ones((18, 10)) * 5,
+                np.ones((1, 10)) * 6,
+                np.ones((1, 10)) * 10,
+            )
+        )
         for col in range(X.shape[1]):
             np.random.shuffle(X[:, col])
 
@@ -100,10 +103,12 @@ def test_transform_with_coalescence(self):
         Y2t = CPPL.transform(X)
         np.testing.assert_array_equal(Y1t, Y2t)
 
-    @pytest.mark.xfail(reason=(
-        "Encoding step does not support sparse matrices to convert negative labels to"
-        " positive ones as it does with non-sparse matrices"
-    ))
+    @pytest.mark.xfail(
+        reason=(
+            "Encoding step does not support sparse matrices to convert negative labels"
+            " to positive ones as it does with non-sparse matrices"
+        )
+    )
     def test_transform_with_sparse_column_with_negative_labels(self):
         X = sparse.csr_matrix([[0], [-1]])
         CategoricalPreprocessingPipeline().fit_transform(X)
diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
index 6a0b9d37fc..5a0a840501 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_numerical.py
@@ -1,69 +1,64 @@
 import unittest
-import numpy as np
 
+import numpy as np
 from scipy import sparse
 
-from autosklearn.pipeline.components.data_preprocessing.feature_type_numerical \
-    import NumericalPreprocessingPipeline
+from autosklearn.pipeline.components.data_preprocessing.feature_type_numerical import (
+    NumericalPreprocessingPipeline,
+)
 
 
 class NumericalPreprocessingPipelineTest(unittest.TestCase):
-
     def test_data_type_consistency(self):
         X = np.random.rand(3, 4)
         Y = NumericalPreprocessingPipeline().fit_transform(X)
         self.assertFalse(sparse.issparse(Y))
 
         X = sparse.csc_matrix(
-            ([3., 6., 4., 5.], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4))
+            ([3.0, 6.0, 4.0, 5.0], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
+        )
         Y = NumericalPreprocessingPipeline().fit_transform(X)
         self.assertTrue(sparse.issparse(Y))
 
     def test_fit_transform(self):
-        X = np.array([
-            [3.14, 1.,     1.],
-            [3.14, 2., np.nan],
-            [3.14, 3.,     3.]])  # noqa : matrix legibility
+        X = np.array(
+            [[3.14, 1.0, 1.0], [3.14, 2.0, np.nan], [3.14, 3.0, 3.0]]
+        )  # noqa : matrix legibility
         # 1st column should be droped due to low variance
         # The 2nd should be be standardized (default rescaling algorithm)
-        # The 3rd will get a value imputed by the mean (2.), therefore the transformation
-        # here will have the same effect as on the the 2nd column
+        # The 3rd will get a value imputed by the mean (2.), therefore the
+        # transformation here will have the same effect as on the the 2nd column
         sdev = np.sqrt(2 / 3)
-        Y1 = np.array([
-            [-1/sdev, -1/sdev],
-            [     0.,      0.],   # noqa : matrix legibility
-            [ 1/sdev,  1/sdev]])  # noqa : matrix legibility
+        Y1 = np.array(
+            [
+                [-1 / sdev, -1 / sdev],
+                [0.0, 0.0],  # noqa : matrix legibility
+                [1 / sdev, 1 / sdev],
+            ]
+        )  # noqa : matrix legibility
         # dense input
         Yt = NumericalPreprocessingPipeline().fit_transform(X)
         np.testing.assert_array_almost_equal(Yt, Y1)
         # sparse input (uses with_mean=False)
-        Y2 = np.array([
-            [1., 1.],
-            [2., 2.],
-            [3., 3.]]) / sdev
+        Y2 = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]) / sdev
         X_sparse = sparse.csc_matrix(X)
         Yt = NumericalPreprocessingPipeline().fit_transform(X_sparse)
         np.testing.assert_array_almost_equal(Yt.todense(), Y2)
 
     def test_transform(self):
-        X1 = np.array([
-            [3.14, 1.,     1.],
-            [3.14, 2., np.nan],
-            [3.14, 3.,     3.]])  # noqa : matrix legibility
+        X1 = np.array(
+            [[3.14, 1.0, 1.0], [3.14, 2.0, np.nan], [3.14, 3.0, 3.0]]
+        )  # noqa : matrix legibility
         sdev = np.sqrt(2 / 3)
         # fit
         NPP = NumericalPreprocessingPipeline()
         NPP.fit_transform(X1)
         # transform
-        X2 = np.array([
-            [1., 5., 8.],
-            [2., 6., 9.],
-            [3., 7., np.nan]])
+        X2 = np.array([[1.0, 5.0, 8.0], [2.0, 6.0, 9.0], [3.0, 7.0, np.nan]])
         Yt = NPP.transform(X2)
         # imputation, variance_threshold and rescaling are done using the data already
         # fitted, therefore:
-        Y2 = np.array([
-            [3/sdev, 6/sdev],
-            [4/sdev, 7/sdev],
-            [5/sdev,     0.]])  # noqa : matrix legibility
+        Y2 = np.array(
+            [[3 / sdev, 6 / sdev], [4 / sdev, 7 / sdev], [5 / sdev, 0.0]]
+        )  # noqa : matrix legibility
         np.testing.assert_array_almost_equal(Yt, Y2)
diff --git a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py
index 0e39c5d7e9..0a2e3d5188 100644
--- a/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py
+++ b/test/test_pipeline/components/data_preprocessing/test_data_preprocessing_text.py
@@ -1,33 +1,57 @@
 import unittest
+
 import numpy as np
 import pandas as pd
 
-from autosklearn.pipeline.components.data_preprocessing.text_encoding.bag_of_word_encoding import \
-    BagOfWordEncoder as BOW
-from autosklearn.pipeline.components.data_preprocessing.\
-    text_encoding.bag_of_word_encoding_distinct import BagOfWordEncoder as BOW_distinct
+from autosklearn.pipeline.components.data_preprocessing.text_encoding.bag_of_word_encoding import (  # noqa: E501
+    BagOfWordEncoder as BOW,
+)
+from autosklearn.pipeline.components.data_preprocessing.text_encoding.bag_of_word_encoding_distinct import (  # noqa: E501
+    BagOfWordEncoder as BOW_distinct,
+)
 
 
 class TextPreprocessingPipelineTest(unittest.TestCase):
-
     def test_fit_transform(self):
-        X = pd.DataFrame({"col1": ["hello world",
-                                   "This is a test"],
-                          "col2": ["hello mars",
-                                   "This is the second column"]}).astype({"col1": "string",
-                                                                          "col2": "string"})
-        BOW_fitted = BOW(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                         min_df_relative=0, random_state=1).fit(X.copy())
+        X = pd.DataFrame(
+            {
+                "col1": ["hello world", "This is a test"],
+                "col2": ["hello mars", "This is the second column"],
+            }
+        ).astype({"col1": "string", "col2": "string"})
+        BOW_fitted = BOW(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit(X.copy())
 
         Yt = BOW_fitted.preprocessor.vocabulary_
-        words = sorted(["hello", "world", "this", "is", "test",  # "a" is not added, len(...)=1
-                        "mars", "the", "second", "column"])      # is ignored by CountVectorizer
+        words = sorted(
+            [
+                "hello",
+                "world",
+                "this",
+                "is",
+                "test",  # "a" is not added, len(...)=1
+                "mars",
+                "the",
+                "second",
+                "column",
+            ]
+        )  # is ignored by CountVectorizer
         Y = {key: idx for idx, key in enumerate(words)}
 
         np.testing.assert_array_equal(Yt, Y)
 
-        BOW_fitted = BOW_distinct(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                                  min_df_relative=0, random_state=1).fit(X.copy())
+        BOW_fitted = BOW_distinct(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit(X.copy())
 
         for key in BOW_fitted.preprocessor:
             y = []
@@ -38,58 +62,89 @@ def test_fit_transform(self):
             np.testing.assert_array_equal(yt, y)
 
     def test_transform(self):
-        X = pd.DataFrame({"col1": ["hello world",
-                                   "this is a test"],
-                          "col2": ["hello mars",
-                                   "this is the second column"]}).astype({"col1": "string",
-                                                                          "col2": "string"})
-        X_t = BOW(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                  min_df_relative=0, random_state=1).fit_transform(X.copy())
+        X = pd.DataFrame(
+            {
+                "col1": ["hello world", "this is a test"],
+                "col2": ["hello mars", "this is the second column"],
+            }
+        ).astype({"col1": "string", "col2": "string"})
+        X_t = BOW(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit_transform(X.copy())
 
         # ['column', 'hello', 'is', 'mars', 'second', 'test', 'the', 'this', 'world']
-        y = np.array([[0, 2, 0, 1, 0, 0, 0, 0, 1],
-                      [1, 0, 2, 0, 1, 1, 1, 2, 0]])
+        y = np.array([[0, 2, 0, 1, 0, 0, 0, 0, 1], [1, 0, 2, 0, 1, 1, 1, 2, 0]])
         np.testing.assert_array_equal(X_t.toarray(), y)
 
-        X_t = BOW_distinct(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                           min_df_relative=0, random_state=1).fit_transform(X.copy())
+        X_t = BOW_distinct(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit_transform(X.copy())
 
         # 'hello', 'is', 'test', 'this', 'world',
         # 'column', 'hello', 'is', 'mars', 'second', 'the', 'this'
-        y = np.array([[1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0],
-                      [0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1]])
+        y = np.array(
+            [[1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0], [0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1]]
+        )
         np.testing.assert_array_equal(X_t.toarray(), y)
 
     def test_check_shape(self):
-        X = pd.DataFrame({"col1": ["hello world",
-                                   "this is test"],
-                          "col2": ["test test",
-                                   "test test"]}).astype({"col1": "string",
-                                                          "col2": "string"})
-        X_t = BOW(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                  min_df_relative=0, random_state=1).fit_transform(X.copy())
+        X = pd.DataFrame(
+            {
+                "col1": ["hello world", "this is test"],
+                "col2": ["test test", "test test"],
+            }
+        ).astype({"col1": "string", "col2": "string"})
+        X_t = BOW(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit_transform(X.copy())
 
         self.assertEqual(X_t.shape, (2, 5))
 
-        X_t = BOW_distinct(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                           min_df_relative=0, random_state=1).fit_transform(X.copy())
+        X_t = BOW_distinct(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit_transform(X.copy())
 
         self.assertEqual(X_t.shape, (2, 6))
 
     def test_check_nan(self):
-        X = pd.DataFrame({"col1": ["hello world",
-                                   "this is test",
-                                   None],
-                          "col2": ["test test",
-                                   "test test",
-                                   "test"]}).astype({"col1": "string",
-                                                     "col2": "string"})
-        X_t = BOW(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                  min_df_relative=0, random_state=1).fit_transform(X.copy())
+        X = pd.DataFrame(
+            {
+                "col1": ["hello world", "this is test", None],
+                "col2": ["test test", "test test", "test"],
+            }
+        ).astype({"col1": "string", "col2": "string"})
+        X_t = BOW(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit_transform(X.copy())
 
         self.assertEqual(X_t.shape, (3, 5))
 
-        X_t = BOW_distinct(ngram_range=1, min_df_choice="min_df_absolute", min_df_absolute=0,
-                           min_df_relative=0, random_state=1).fit_transform(X.copy())
+        X_t = BOW_distinct(
+            ngram_range=1,
+            min_df_choice="min_df_absolute",
+            min_df_absolute=0,
+            min_df_relative=0,
+            random_state=1,
+        ).fit_transform(X.copy())
 
         self.assertEqual(X_t.shape, (3, 6))
diff --git a/test/test_pipeline/components/data_preprocessing/test_minority_coalescence.py b/test/test_pipeline/components/data_preprocessing/test_minority_coalescence.py
index 7b3e35763e..8e73e963ab 100644
--- a/test/test_pipeline/components/data_preprocessing/test_minority_coalescence.py
+++ b/test/test_pipeline/components/data_preprocessing/test_minority_coalescence.py
@@ -1,23 +1,25 @@
 import unittest
-import numpy as np
 
+import numpy as np
 import scipy.sparse
 
-from autosklearn.pipeline.components.data_preprocessing.minority_coalescense\
-    .minority_coalescer import MinorityCoalescer
-from autosklearn.pipeline.components.data_preprocessing.minority_coalescense\
-    .no_coalescense import NoCoalescence
+from autosklearn.pipeline.components.data_preprocessing.minority_coalescense.minority_coalescer import (  # noqa: E501
+    MinorityCoalescer,
+)
+from autosklearn.pipeline.components.data_preprocessing.minority_coalescense.no_coalescense import (  # noqa: E501
+    NoCoalescence,
+)
 
 
 class MinorityCoalescerTest(unittest.TestCase):
-
     def test_data_type_consistency(self):
         X = np.random.randint(3, 6, (3, 4))
         Y = MinorityCoalescer().fit_transform(X)
         self.assertFalse(scipy.sparse.issparse(Y))
 
         X = scipy.sparse.csc_matrix(
-            ([3, 6, 4, 5], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4))
+            ([3, 6, 4, 5], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
+        )
         Y = MinorityCoalescer().fit_transform(X)
         self.assertTrue(scipy.sparse.issparse(Y))
 
diff --git a/test/test_pipeline/components/data_preprocessing/test_numerical_imputation.py b/test/test_pipeline/components/data_preprocessing/test_numerical_imputation.py
index 35d9d23a6d..d3354c3730 100644
--- a/test/test_pipeline/components/data_preprocessing/test_numerical_imputation.py
+++ b/test/test_pipeline/components/data_preprocessing/test_numerical_imputation.py
@@ -1,8 +1,9 @@
 from scipy import sparse
 
-from autosklearn.pipeline.components.data_preprocessing.imputation.numerical_imputation\
-    import NumericalImputation
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase
+from autosklearn.pipeline.components.data_preprocessing.imputation.numerical_imputation import (  # noqa: E501
+    NumericalImputation,
+)
+from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing
 
 
 class NumericalImputationTest(PreprocessingTestCase):
@@ -14,13 +15,13 @@ def test_default_configuration(self):
             self.assertTrue((transformation == original).all())
             transformations.append(transformation)
             if len(transformations) > 1:
-                self.assertTrue(
-                    (transformations[-1] == transformations[-2]).all())
+                self.assertTrue((transformations[-1] == transformations[-2]).all())
 
     def test_default_configuration_sparse_data(self):
         transformations = []
-        transformation, original = _test_preprocessing(NumericalImputation,
-                                                       make_sparse=True)
+        transformation, original = _test_preprocessing(
+            NumericalImputation, make_sparse=True
+        )
         self.assertEqual(transformation.shape, original.shape)
         self.assertTrue((transformation.data == original.data).all())
         self.assertIsInstance(transformation, sparse.csc_matrix)
@@ -28,4 +29,5 @@ def test_default_configuration_sparse_data(self):
 
     def test_preprocessing_dtype(self):
         super(NumericalImputationTest, self)._test_preprocessing_dtype(
-            NumericalImputation, add_NaNs=True)
+            NumericalImputation, add_NaNs=True
+        )
diff --git a/test/test_pipeline/components/data_preprocessing/test_one_hot_encoding.py b/test/test_pipeline/components/data_preprocessing/test_one_hot_encoding.py
index ba99724964..08d2cadd9e 100644
--- a/test/test_pipeline/components/data_preprocessing/test_one_hot_encoding.py
+++ b/test/test_pipeline/components/data_preprocessing/test_one_hot_encoding.py
@@ -3,10 +3,12 @@
 import numpy as np
 from scipy import sparse
 
-from autosklearn.pipeline.components.data_preprocessing.categorical_encoding.\
-    one_hot_encoding import OneHotEncoder
-from autosklearn.pipeline.components.data_preprocessing.categorical_encoding.\
-    no_encoding import NoEncoding
+from autosklearn.pipeline.components.data_preprocessing.categorical_encoding.no_encoding import (  # noqa: E501
+    NoEncoding,
+)
+from autosklearn.pipeline.components.data_preprocessing.categorical_encoding.one_hot_encoding import (  # noqa: E501
+    OneHotEncoder,
+)
 from autosklearn.pipeline.util import _test_preprocessing
 
 
@@ -18,7 +20,6 @@ def create_X(instances=1000, n_feats=10, categs_per_feat=5, seed=0):
 
 
 class OneHotEncoderTest(unittest.TestCase):
-
     def setUp(self):
         self.X_train = create_X()
 
@@ -28,7 +29,8 @@ def test_data_type_consistency(self):
         self.assertFalse(sparse.issparse(Y))
 
         X = sparse.csc_matrix(
-            ([3, 6, 4, 5], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4))
+            ([3, 6, 4, 5], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
+        )
         Y = OneHotEncoder().fit_transform(X)
         self.assertTrue(sparse.issparse(Y))
 
@@ -54,8 +56,7 @@ def test_default_configuration_no_encoding(self):
             self.assertTrue((transformation == original).all())
             transformations.append(transformation)
             if len(transformations) > 1:
-                self.assertTrue(
-                    (transformations[-1] == transformations[-2]).all())
+                self.assertTrue((transformations[-1] == transformations[-2]).all())
 
     def test_default_configuration_sparse_data(self):
         transformations = []
@@ -74,17 +75,18 @@ def test_default_configuration_sparse_data(self):
             transformations.append(Xt)
             if len(transformations) > 1:
                 self.assertEqual(
-                    (transformations[-1] != transformations[-2]).count_nonzero(), 0)
+                    (transformations[-1] != transformations[-2]).count_nonzero(), 0
+                )
 
     def test_default_configuration_sparse_no_encoding(self):
         transformations = []
 
         for i in range(2):
-            transformation, original = _test_preprocessing(NoEncoding,
-                                                           make_sparse=True)
+            transformation, original = _test_preprocessing(NoEncoding, make_sparse=True)
             self.assertEqual(transformation.shape, original.shape)
             self.assertTrue((transformation.todense() == original.todense()).all())
             transformations.append(transformation)
             if len(transformations) > 1:
                 self.assertEqual(
-                    (transformations[-1] != transformations[-2]).count_nonzero(), 0)
+                    (transformations[-1] != transformations[-2]).count_nonzero(), 0
+                )
diff --git a/test/test_pipeline/components/data_preprocessing/test_scaling.py b/test/test_pipeline/components/data_preprocessing/test_scaling.py
index f800930dda..7f8249e3f1 100644
--- a/test/test_pipeline/components/data_preprocessing/test_scaling.py
+++ b/test/test_pipeline/components/data_preprocessing/test_scaling.py
@@ -12,13 +12,14 @@ def _test_helper(self, Preprocessor, dataset=None, make_sparse=False):
         X_train, Y_train, X_test, Y_test = get_dataset(
             dataset=dataset,
             make_sparse=make_sparse,
-            )
+        )
 
-        dataset_properties = {'sparse': make_sparse}
+        dataset_properties = {"sparse": make_sparse}
 
         original_X_train = X_train.copy()
-        configuration_space = Preprocessor(dataset_properties).\
-            get_hyperparameter_search_space(dataset_properties)
+        configuration_space = Preprocessor(
+            dataset_properties
+        ).get_hyperparameter_search_space(dataset_properties)
         default = configuration_space.get_default_configuration()
 
         preprocessor = Preprocessor(dataset_properties, random_state=1)
@@ -28,31 +29,31 @@ def _test_helper(self, Preprocessor, dataset=None, make_sparse=False):
         return transformer.transform(X_train), original_X_train
 
     def test_boston_is_not_scaled(self):
-        data = sklearn.datasets.load_boston()['data']
+        data = sklearn.datasets.load_boston()["data"]
         self.assertGreaterEqual(np.max(data), 100)
 
     def test_default_configuration(self):
         transformations = []
         for i in range(2):
-            transformation, original = self._test_helper(RescalingChoice,
-                                                         dataset='boston')
+            transformation, original = self._test_helper(
+                RescalingChoice, dataset="boston"
+            )
             # The maximum is around 1.95 for the transformed array...
             self.assertAlmostEqual(np.mean(transformation), 0, places=5)
             self.assertAlmostEqual(np.std(transformation), 1, places=5)
             self.assertFalse((original == transformation).all())
             transformations.append(transformation)
             if len(transformations) > 1:
-                self.assertTrue(
-                    (transformations[-1] == transformations[-2]).all())
+                self.assertTrue((transformations[-1] == transformations[-2]).all())
 
     def test_default_configuration_with_sparse_data(self):
-        preprocessing = self._test_helper(RescalingChoice, dataset='boston',
-                                          make_sparse=True)
+        preprocessing = self._test_helper(
+            RescalingChoice, dataset="boston", make_sparse=True
+        )
         transformation, original = preprocessing
         self.assertEqual(original.getnnz(), transformation.getnnz())
         self.assertTrue(~np.allclose(original.data, transformation.data))
 
     @unittest.skip("Does not work at the moment.")
     def test_preprocessing_dtype(self):
-        super(ScalingComponentTest, self)._test_helper(
-            RescalingChoice)
+        super(ScalingComponentTest, self)._test_helper(RescalingChoice)
diff --git a/test/test_pipeline/components/data_preprocessing/test_variance_threshold.py b/test/test_pipeline/components/data_preprocessing/test_variance_threshold.py
index 4da441828d..a9ba4083ca 100644
--- a/test/test_pipeline/components/data_preprocessing/test_variance_threshold.py
+++ b/test/test_pipeline/components/data_preprocessing/test_variance_threshold.py
@@ -1,8 +1,9 @@
 from scipy import sparse
 
-from autosklearn.pipeline.components.data_preprocessing.variance_threshold.variance_threshold \
-    import VarianceThreshold
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase
+from autosklearn.pipeline.components.data_preprocessing.variance_threshold.variance_threshold import (  # noqa: E501
+    VarianceThreshold,
+)
+from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing
 
 
 class VarianceThresholdTest(PreprocessingTestCase):
@@ -14,13 +15,13 @@ def test_default_configuration(self):
             self.assertTrue((transformation == original).all())
             transformations.append(transformation)
             if len(transformations) > 1:
-                self.assertTrue(
-                    (transformations[-1] == transformations[-2]).all())
+                self.assertTrue((transformations[-1] == transformations[-2]).all())
 
     def test_default_configuration_sparse_data(self):
         transformations = []
-        transformation, original = _test_preprocessing(VarianceThreshold,
-                                                       make_sparse=True)
+        transformation, original = _test_preprocessing(
+            VarianceThreshold, make_sparse=True
+        )
         self.assertEqual(transformation.shape, (100, 3))
         self.assertTrue((transformation.toarray() == original.toarray()[:, 1:]).all())
         self.assertIsInstance(transformation, sparse.csr_matrix)
diff --git a/test/test_pipeline/components/dummy_components/dummy_component_1.py b/test/test_pipeline/components/dummy_components/dummy_component_1.py
index 06074db983..0af3466787 100644
--- a/test/test_pipeline/components/dummy_components/dummy_component_1.py
+++ b/test/test_pipeline/components/dummy_components/dummy_component_1.py
@@ -5,7 +5,7 @@
 
 # Add the parent directory to the path to import the parent component
 this_directory = os.path.dirname(os.path.abspath(__file__))
-parent_directory = os.path.abspath(os.path.join(this_directory, '..'))
+parent_directory = os.path.abspath(os.path.join(this_directory, ".."))
 sys.path.append(parent_directory)
 
 
diff --git a/test/test_pipeline/components/dummy_components/dummy_component_2.py b/test/test_pipeline/components/dummy_components/dummy_component_2.py
index 9b67230e4c..f941dcdb40 100644
--- a/test/test_pipeline/components/dummy_components/dummy_component_2.py
+++ b/test/test_pipeline/components/dummy_components/dummy_component_2.py
@@ -6,7 +6,7 @@
 # Add the parent directory to the path to import the parent component as
 # dummy_components.dummy_component_2.DummyComponent1
 this_directory = os.path.dirname(os.path.abspath(__file__))
-parent_directory = os.path.abspath(os.path.join(this_directory, '..'))
+parent_directory = os.path.abspath(os.path.join(this_directory, ".."))
 sys.path.append(parent_directory)
 
 
diff --git a/test/test_pipeline/components/dummy_components/dummy_component_import.py b/test/test_pipeline/components/dummy_components/dummy_component_import.py
index f7981a40a3..a4cb764215 100644
--- a/test/test_pipeline/components/dummy_components/dummy_component_import.py
+++ b/test/test_pipeline/components/dummy_components/dummy_component_import.py
@@ -1,2 +1,4 @@
-from autosklearn.pipeline.components.base import find_components # noqa
-from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm  # noqa
+from autosklearn.pipeline.components.base import find_components  # noqa
+from autosklearn.pipeline.components.base import (  # noqa
+    AutoSklearnClassificationAlgorithm,
+)
diff --git a/test/test_pipeline/components/feature_preprocessing/__init__.py b/test/test_pipeline/components/feature_preprocessing/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/test/test_pipeline/components/feature_preprocessing/__init__.py
+++ b/test/test_pipeline/components/feature_preprocessing/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_pipeline/components/feature_preprocessing/test_NoPreprocessing.py b/test/test_pipeline/components/feature_preprocessing/test_NoPreprocessing.py
index 22811e75bb..440f2fd50d 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_NoPreprocessing.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_NoPreprocessing.py
@@ -1,7 +1,9 @@
 import numpy as np
 
-from autosklearn.pipeline.components.feature_preprocessing.no_preprocessing import NoPreprocessing
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase
+from autosklearn.pipeline.components.feature_preprocessing.no_preprocessing import (
+    NoPreprocessing,
+)
+from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing
 
 
 class NoneComponentTest(PreprocessingTestCase):
diff --git a/test/test_pipeline/components/feature_preprocessing/test_choice.py b/test/test_pipeline/components/feature_preprocessing/test_choice.py
index 525ec38356..516cf318bf 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_choice.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_choice.py
@@ -6,27 +6,27 @@
 class FeatureProcessingTest(unittest.TestCase):
     def test_get_available_components(self):
         # Target type
-        for target_type, num_values in [('classification', 15),
-                                        ('regression', 14)]:
-            data_properties = {'target_type': target_type}
+        for target_type, num_values in [("classification", 15), ("regression", 14)]:
+            data_properties = {"target_type": target_type}
 
-            available_components = fp.FeaturePreprocessorChoice(data_properties)\
-                .get_available_components(data_properties)
+            available_components = fp.FeaturePreprocessorChoice(
+                data_properties
+            ).get_available_components(data_properties)
 
             self.assertEqual(len(available_components), num_values)
 
         # Multiclass
-        data_properties = {'target_type': 'classification',
-                           'multiclass': True}
-        available_components = fp.FeaturePreprocessorChoice(data_properties) \
-            .get_available_components(data_properties)
+        data_properties = {"target_type": "classification", "multiclass": True}
+        available_components = fp.FeaturePreprocessorChoice(
+            data_properties
+        ).get_available_components(data_properties)
 
         self.assertEqual(len(available_components), 15)
 
         # Multilabel
-        data_properties = {'target_type': 'classification',
-                           'multilabel': True}
-        available_components = fp.FeaturePreprocessorChoice(data_properties) \
-            .get_available_components(data_properties)
+        data_properties = {"target_type": "classification", "multilabel": True}
+        available_components = fp.FeaturePreprocessorChoice(
+            data_properties
+        ).get_available_components(data_properties)
 
         self.assertEqual(len(available_components), 12)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_densifier.py b/test/test_pipeline/components/feature_preprocessing/test_densifier.py
index 6f02ee0e5b..9831a53e57 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_densifier.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_densifier.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from autosklearn.pipeline.components.feature_preprocessing.densifier import Densifier
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase
+from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing
 
 
 class DensifierComponentTest(PreprocessingTestCase):
diff --git a/test/test_pipeline/components/feature_preprocessing/test_extra_trees_classification.py b/test/test_pipeline/components/feature_preprocessing/test_extra_trees_classification.py
index 6b69462fec..2db52679c7 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_extra_trees_classification.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_extra_trees_classification.py
@@ -1,29 +1,36 @@
-from sklearn.linear_model import RidgeClassifier
-from autosklearn.pipeline.components.feature_preprocessing.\
-    extra_trees_preproc_for_classification import \
-    ExtraTreesPreprocessorClassification
-from autosklearn.pipeline.util import _test_preprocessing, \
-    PreprocessingTestCase, get_dataset
 import sklearn.metrics
+from sklearn.linear_model import RidgeClassifier
+
+from autosklearn.pipeline.components.feature_preprocessing.extra_trees_preproc_for_classification import (  # noqa: E501
+    ExtraTreesPreprocessorClassification,
+)
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
 
 class ExtreTreesClassificationComponentTest(PreprocessingTestCase):
     def test_default_configuration(self):
         transformation, original = _test_preprocessing(
-                ExtraTreesPreprocessorClassification)
+            ExtraTreesPreprocessorClassification
+        )
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertFalse((transformation == 0).all())
 
     def test_default_configuration_classify(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
-                                                           make_sparse=False)
-            configuration_space = ExtraTreesPreprocessorClassification.\
-                get_hyperparameter_search_space()
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="digits", make_sparse=False
+            )
+            configuration_space = (
+                ExtraTreesPreprocessorClassification.get_hyperparameter_search_space()
+            )
             default = configuration_space.get_default_configuration()
             preprocessor = ExtraTreesPreprocessorClassification(
-                    random_state=1,
-                    **{hp_name: default[hp_name] for hp_name in default})
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -37,14 +44,16 @@ def test_default_configuration_classify(self):
 
     def test_default_configuration_classify_sparse(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
-                                                           make_sparse=True)
-            configuration_space = ExtraTreesPreprocessorClassification.\
-                get_hyperparameter_search_space()
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="digits", make_sparse=True
+            )
+            configuration_space = (
+                ExtraTreesPreprocessorClassification.get_hyperparameter_search_space()
+            )
             default = configuration_space.get_default_configuration()
             preprocessor = ExtraTreesPreprocessorClassification(
-                    random_state=1,
-                    **{hp_name: default[hp_name] for hp_name in default})
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -57,5 +66,6 @@ def test_default_configuration_classify_sparse(self):
             self.assertAlmostEqual(accuracy, 0.43715846994535518, places=2)
 
     def test_preprocessing_dtype(self):
-        super(ExtreTreesClassificationComponentTest, self).\
-            _test_preprocessing_dtype(ExtraTreesPreprocessorClassification)
+        super(ExtreTreesClassificationComponentTest, self)._test_preprocessing_dtype(
+            ExtraTreesPreprocessorClassification
+        )
diff --git a/test/test_pipeline/components/feature_preprocessing/test_extra_trees_regression.py b/test/test_pipeline/components/feature_preprocessing/test_extra_trees_regression.py
index b850d5aa99..cd6ae3dd21 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_extra_trees_regression.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_extra_trees_regression.py
@@ -1,29 +1,34 @@
-from sklearn.ensemble import ExtraTreesRegressor
-from autosklearn.pipeline.components.feature_preprocessing.\
-    extra_trees_preproc_for_regression import \
-    ExtraTreesPreprocessorRegression
-from autosklearn.pipeline.util import _test_preprocessing, \
-    PreprocessingTestCase, get_dataset
 import sklearn.metrics
+from sklearn.ensemble import ExtraTreesRegressor
+
+from autosklearn.pipeline.components.feature_preprocessing.extra_trees_preproc_for_regression import (  # noqa: E501
+    ExtraTreesPreprocessorRegression,
+)
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
 
 class ExtraTreesRegressionComponentTest(PreprocessingTestCase):
     def test_default_configuration(self):
-        transformation, original = _test_preprocessing(
-                ExtraTreesPreprocessorRegression)
+        transformation, original = _test_preprocessing(ExtraTreesPreprocessorRegression)
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertFalse((transformation == 0).all())
 
     def test_default_configuration_regression(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
-                                                           make_sparse=False)
-            configuration_space = ExtraTreesPreprocessorRegression.\
-                get_hyperparameter_search_space()
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="boston", make_sparse=False
+            )
+            configuration_space = (
+                ExtraTreesPreprocessorRegression.get_hyperparameter_search_space()
+            )
             default = configuration_space.get_default_configuration()
             preprocessor = ExtraTreesPreprocessorRegression(
-                    random_state=1,
-                    **{hp_name: default[hp_name] for hp_name in default})
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -37,14 +42,16 @@ def test_default_configuration_regression(self):
 
     def test_default_configuration_classify_sparse(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
-                                                           make_sparse=True)
-            configuration_space = ExtraTreesPreprocessorRegression.\
-                get_hyperparameter_search_space()
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="boston", make_sparse=True
+            )
+            configuration_space = (
+                ExtraTreesPreprocessorRegression.get_hyperparameter_search_space()
+            )
             default = configuration_space.get_default_configuration()
             preprocessor = ExtraTreesPreprocessorRegression(
-                    random_state=1,
-                    **{hp_name: default[hp_name] for hp_name in default})
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -57,5 +64,6 @@ def test_default_configuration_classify_sparse(self):
             self.assertAlmostEqual(error, 55.69613978965742, places=2)
 
     def test_preprocessing_dtype(self):
-        super(ExtraTreesRegressionComponentTest, self).\
-            _test_preprocessing_dtype(ExtraTreesPreprocessorRegression)
+        super(ExtraTreesRegressionComponentTest, self)._test_preprocessing_dtype(
+            ExtraTreesPreprocessorRegression
+        )
diff --git a/test/test_pipeline/components/feature_preprocessing/test_fast_ica.py b/test/test_pipeline/components/feature_preprocessing/test_fast_ica.py
index ae22d65c54..a38097a60e 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_fast_ica.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_fast_ica.py
@@ -1,28 +1,30 @@
 import unittest
 
-from sklearn.linear_model import Ridge
-from autosklearn.pipeline.components.feature_preprocessing.fast_ica import \
-    FastICA
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase, \
-    get_dataset
 import sklearn.metrics
+from sklearn.linear_model import Ridge
+
+from autosklearn.pipeline.components.feature_preprocessing.fast_ica import FastICA
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
 
 class FastICAComponentTest(PreprocessingTestCase):
     def test_default_configuration(self):
-        transformation, original = _test_preprocessing(FastICA,
-                                                       dataset="diabetes")
+        transformation, original = _test_preprocessing(FastICA, dataset="diabetes")
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertFalse((transformation == 0).all())
 
     def test_default_configuration_regression(self):
         for i in range(5):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
+            X_train, Y_train, X_test, Y_test = get_dataset(dataset="diabetes")
             configuration_space = FastICA.get_hyperparameter_search_space()
             default = configuration_space.get_default_configuration()
-            preprocessor = FastICA(random_state=1,
-                                   **{hp_name: default[hp_name] for hp_name in
-                                      default})
+            preprocessor = FastICA(
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -36,5 +38,6 @@ def test_default_configuration_regression(self):
 
     @unittest.skip("Always returns float64")
     def test_preprocessing_dtype(self):
-        super(FastICAComponentTest,
-              self)._test_preprocessing_dtype(FastICA, dataset='diabetes')
+        super(FastICAComponentTest, self)._test_preprocessing_dtype(
+            FastICA, dataset="diabetes"
+        )
diff --git a/test/test_pipeline/components/feature_preprocessing/test_feature_agglomeration.py b/test/test_pipeline/components/feature_preprocessing/test_feature_agglomeration.py
index 0cac9426d2..afccd79c31 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_feature_agglomeration.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_feature_agglomeration.py
@@ -1,9 +1,14 @@
-from sklearn.ensemble import RandomForestClassifier
-from autosklearn.pipeline.components.feature_preprocessing.feature_agglomeration import \
-    FeatureAgglomeration
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase, \
-    get_dataset
 import sklearn.metrics
+from sklearn.ensemble import RandomForestClassifier
+
+from autosklearn.pipeline.components.feature_preprocessing.feature_agglomeration import (  # noqa: E501
+    FeatureAgglomeration,
+)
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
 
 class FeatureAgglomerationComponentTest(PreprocessingTestCase):
@@ -14,13 +19,14 @@ def test_default_configuration(self):
 
     def test_default_configuration_classify(self):
         for i in range(3):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
-                                                           make_sparse=False)
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="digits", make_sparse=False
+            )
             configuration_space = FeatureAgglomeration.get_hyperparameter_search_space()
             default = configuration_space.get_default_configuration()
-            preprocessor = FeatureAgglomeration(random_state=1,
-                                                **{hp_name: default[hp_name] for
-                                                   hp_name in default})
+            preprocessor = FeatureAgglomeration(
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -33,6 +39,6 @@ def test_default_configuration_classify(self):
             self.assertAlmostEqual(accuracy, 0.8761384335154827)
 
     def test_preprocessing_dtype(self):
-        super(FeatureAgglomerationComponentTest,
-              self)._test_preprocessing_dtype(FeatureAgglomeration,
-                                              test_sparse=False)
+        super(FeatureAgglomerationComponentTest, self)._test_preprocessing_dtype(
+            FeatureAgglomeration, test_sparse=False
+        )
diff --git a/test/test_pipeline/components/feature_preprocessing/test_kernel_pca.py b/test/test_pipeline/components/feature_preprocessing/test_kernel_pca.py
index 19b1368a49..2c5a8c865b 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_kernel_pca.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_kernel_pca.py
@@ -1,38 +1,46 @@
 import unittest
 
-from sklearn.linear_model import RidgeClassifier
-from autosklearn.pipeline.components.feature_preprocessing.kernel_pca import \
-    KernelPCA
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase, \
-    get_dataset
 import sklearn.metrics
+from sklearn.linear_model import RidgeClassifier
+
+from autosklearn.pipeline.components.feature_preprocessing.kernel_pca import KernelPCA
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
 
 class KernelPCAComponentTest(PreprocessingTestCase):
     def test_default_configuration(self):
-        transformation, original = _test_preprocessing(KernelPCA,
-                                                       dataset='digits',
-                                                       train_size_maximum=2000)
+        transformation, original = _test_preprocessing(
+            KernelPCA, dataset="digits", train_size_maximum=2000
+        )
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertFalse((transformation == 0).all())
 
     def test_default_configuration_sparse(self):
-        transformation, original = _test_preprocessing(KernelPCA,
-                                                       make_sparse=True,
-                                                       dataset='digits')
+        transformation, original = _test_preprocessing(
+            KernelPCA, make_sparse=True, dataset="digits"
+        )
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertFalse((transformation == 0).all())
 
     def test_default_configuration_classify(self):
         for i in range(5):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
-                                                           make_sparse=False,
-                                                           train_size_maximum=1000)
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="digits", make_sparse=False, train_size_maximum=1000
+            )
             configuration_space = KernelPCA.get_hyperparameter_search_space()
             default = configuration_space.get_default_configuration()
-            preprocessor = KernelPCA(random_state=1,
-                                     **{hp_name: default[hp_name] for hp_name in
-                                        default if default[hp_name] is not None})
+            preprocessor = KernelPCA(
+                random_state=1,
+                **{
+                    hp_name: default[hp_name]
+                    for hp_name in default
+                    if default[hp_name] is not None
+                },
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -46,5 +54,4 @@ def test_default_configuration_classify(self):
 
     @unittest.skip("Always returns float64")
     def test_preprocessing_dtype(self):
-        super(KernelPCAComponentTest,
-              self)._test_preprocessing_dtype(KernelPCA)
+        super(KernelPCAComponentTest, self)._test_preprocessing_dtype(KernelPCA)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_kitchen_sinks.py b/test/test_pipeline/components/feature_preprocessing/test_kitchen_sinks.py
index c94e6f9a55..16ef41198d 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_kitchen_sinks.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_kitchen_sinks.py
@@ -1,7 +1,9 @@
 import unittest
 
-from autosklearn.pipeline.components.feature_preprocessing.kitchen_sinks import RandomKitchenSinks
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase
+from autosklearn.pipeline.components.feature_preprocessing.kitchen_sinks import (
+    RandomKitchenSinks,
+)
+from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing
 
 
 class KitchenSinkComponent(PreprocessingTestCase):
@@ -13,5 +15,4 @@ def test_default_configuration(self):
 
     @unittest.skip("Right now, the RBFSampler returns a float64 array!")
     def test_preprocessing_dtype(self):
-        super(KitchenSinkComponent,
-              self)._test_preprocessing_dtype(RandomKitchenSinks)
+        super(KitchenSinkComponent, self)._test_preprocessing_dtype(RandomKitchenSinks)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_liblinear.py b/test/test_pipeline/components/feature_preprocessing/test_liblinear.py
index 19b56b6eac..0195dfb701 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_liblinear.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_liblinear.py
@@ -1,15 +1,22 @@
-from sklearn.linear_model import RidgeClassifier
-from autosklearn.pipeline.components.feature_preprocessing.liblinear_svc_preprocessor import \
-    LibLinear_Preprocessor
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase, \
-    get_dataset
 import sklearn.metrics
+from sklearn.linear_model import RidgeClassifier
 
-from test.test_pipeline.ignored_warnings import ignore_warnings, feature_preprocessing_warnings
+from autosklearn.pipeline.components.feature_preprocessing.liblinear_svc_preprocessor import (  # noqa: E501
+    LibLinear_Preprocessor,
+)
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
+from test.test_pipeline.ignored_warnings import (
+    feature_preprocessing_warnings,
+    ignore_warnings,
+)
 
-class LiblinearComponentTest(PreprocessingTestCase):
 
+class LiblinearComponentTest(PreprocessingTestCase):
     def test_default_configuration(self):
         with ignore_warnings(feature_preprocessing_warnings):
             transformation, original = _test_preprocessing(LibLinear_Preprocessor)
@@ -19,15 +26,21 @@ def test_default_configuration(self):
 
     def test_default_configuration_classify(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
-                                                           make_sparse=False)
-            configuration_space = LibLinear_Preprocessor.get_hyperparameter_search_space()
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="digits", make_sparse=False
+            )
+            configuration_space = (
+                LibLinear_Preprocessor.get_hyperparameter_search_space()
+            )
             default = configuration_space.get_default_configuration()
-            preprocessor = LibLinear_Preprocessor(random_state=1,
-                                                  **{hp_name: default[hp_name]
-                                                     for hp_name in
-                                                     default if default[
-                                                      hp_name] is not None})
+            preprocessor = LibLinear_Preprocessor(
+                random_state=1,
+                **{
+                    hp_name: default[hp_name]
+                    for hp_name in default
+                    if default[hp_name] is not None
+                },
+            )
 
             with ignore_warnings(feature_preprocessing_warnings):
                 preprocessor.fit(X_train, Y_train)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_nystroem_sampler.py b/test/test_pipeline/components/feature_preprocessing/test_nystroem_sampler.py
index b3db49ebca..d6244c362f 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_nystroem_sampler.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_nystroem_sampler.py
@@ -3,8 +3,9 @@
 import numpy as np
 import sklearn.preprocessing
 
-from autosklearn.pipeline.components.feature_preprocessing.nystroem_sampler import \
-    Nystroem
+from autosklearn.pipeline.components.feature_preprocessing.nystroem_sampler import (
+    Nystroem,
+)
 from autosklearn.pipeline.util import _test_preprocessing, get_dataset
 
 
@@ -16,7 +17,7 @@ def test_default_configuration(self):
         self.assertFalse((transformation == 0).all())
 
         # Custon preprocessing test to check if clipping to zero works
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
         original_X_train = X_train.copy()
         ss = sklearn.preprocessing.StandardScaler()
         X_train = ss.fit_transform(X_train)
@@ -25,12 +26,15 @@ def test_default_configuration(self):
 
         preprocessor = Nystroem(
             random_state=1,
-            **{hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None},
-            )
+            **{
+                hp_name: default[hp_name]
+                for hp_name in default
+                if default[hp_name] is not None
+            },
+        )
 
         transformer = preprocessor.fit(X_train, Y_train)
-        transformation, original = transformer.transform(
-            X_train), original_X_train
+        transformation, original = transformer.transform(X_train), original_X_train
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertEqual(transformation.shape[1], 100)
 
@@ -46,7 +50,7 @@ def _test_preprocessing_dtype(self):
         preprocessor = Nystroem(
             random_state=1,
             **{hp.hyperparameter.name: hp.value for hp in default.values.values()},
-            )
+        )
         preprocessor.fit(X_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -59,7 +63,7 @@ def _test_preprocessing_dtype(self):
         preprocessor = Nystroem(
             random_state=1,
             **{hp.hyperparameter.name: hp.value for hp in default.values.values()},
-            )
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
@@ -73,7 +77,7 @@ def _test_preprocessing_dtype(self):
         preprocessor = Nystroem(
             random_state=1,
             **{hp.hyperparameter.name: hp.value for hp in default.values.values()},
-            )
+        )
         preprocessor.fit(X_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -86,7 +90,7 @@ def _test_preprocessing_dtype(self):
         preprocessor = Nystroem(
             random_state=1,
             **{hp.hyperparameter.name: hp.value for hp in default.values.values()},
-            )
+        )
         preprocessor.fit(X_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_pca.py b/test/test_pipeline/components/feature_preprocessing/test_pca.py
index 02ab8bdd0e..b73da8aa64 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_pca.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_pca.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from autosklearn.pipeline.components.feature_preprocessing.pca import PCA
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase
+from autosklearn.pipeline.util import PreprocessingTestCase, _test_preprocessing
 
 
 class PCAComponentTest(PreprocessingTestCase):
@@ -13,9 +13,9 @@ def test_default_configuration(self):
             self.assertFalse((transformation == original).all())
             transformations.append(transformation)
             if len(transformations) > 1:
-                np.testing.assert_allclose(transformations[-1],
-                                           transformations[-2], rtol=1e-4)
+                np.testing.assert_allclose(
+                    transformations[-1], transformations[-2], rtol=1e-4
+                )
 
     def test_preprocessing_dtype(self):
-        super(PCAComponentTest, self)._test_preprocessing_dtype(PCA,
-                                                                test_sparse=False)
+        super(PCAComponentTest, self)._test_preprocessing_dtype(PCA, test_sparse=False)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_polynomial.py b/test/test_pipeline/components/feature_preprocessing/test_polynomial.py
index 28f84bc595..3c9e93a49c 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_polynomial.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_polynomial.py
@@ -1,9 +1,14 @@
-from sklearn.tree import DecisionTreeClassifier
-from autosklearn.pipeline.components.feature_preprocessing.polynomial import \
-    PolynomialFeatures
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase, \
-    get_dataset
 import sklearn.metrics
+from sklearn.tree import DecisionTreeClassifier
+
+from autosklearn.pipeline.components.feature_preprocessing.polynomial import (
+    PolynomialFeatures,
+)
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
 
 class PolynomialFeaturesComponentTest(PreprocessingTestCase):
@@ -14,13 +19,14 @@ def test_default_configuration(self):
 
     def test_default_configuration_classify(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='breast_cancer',
-                                                           make_sparse=False)
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="breast_cancer", make_sparse=False
+            )
             configuration_space = PolynomialFeatures.get_hyperparameter_search_space()
             default = configuration_space.get_default_configuration()
-            preprocessor = PolynomialFeatures(random_state=1,
-                                              **{hp_name: default[hp_name] for
-                                                 hp_name in default})
+            preprocessor = PolynomialFeatures(
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -34,13 +40,14 @@ def test_default_configuration_classify(self):
 
     def test_default_configuration_classify_sparse(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='breast_cancer',
-                                                           make_sparse=True)
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="breast_cancer", make_sparse=True
+            )
             configuration_space = PolynomialFeatures.get_hyperparameter_search_space()
             default = configuration_space.get_default_configuration()
-            preprocessor = PolynomialFeatures(random_state=1,
-                                              **{hp_name: default[hp_name] for
-                                                 hp_name in default})
+            preprocessor = PolynomialFeatures(
+                random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -53,6 +60,6 @@ def test_default_configuration_classify_sparse(self):
             self.assertAlmostEqual(accuracy, 0.8544152744630071, places=2)
 
     def test_preprocessing_dtype(self):
-        super(PolynomialFeaturesComponentTest,
-              self)._test_preprocessing_dtype(PolynomialFeatures,
-                                              test_sparse=False)
+        super(PolynomialFeaturesComponentTest, self)._test_preprocessing_dtype(
+            PolynomialFeatures, test_sparse=False
+        )
diff --git a/test/test_pipeline/components/feature_preprocessing/test_random_trees_embedding.py b/test/test_pipeline/components/feature_preprocessing/test_random_trees_embedding.py
index 681c319830..f84675dc1a 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_random_trees_embedding.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_random_trees_embedding.py
@@ -3,8 +3,9 @@
 import numpy as np
 import scipy.sparse
 
-from autosklearn.pipeline.components.feature_preprocessing.random_trees_embedding import \
-    RandomTreesEmbedding
+from autosklearn.pipeline.components.feature_preprocessing.random_trees_embedding import (  # noqa: E501
+    RandomTreesEmbedding,
+)
 from autosklearn.pipeline.util import _test_preprocessing, get_dataset
 
 
@@ -26,10 +27,9 @@ def test_preprocessing_dtype(self):
 
         configuration_space = RandomTreesEmbedding.get_hyperparameter_search_space()
         default = configuration_space.get_default_configuration()
-        preprocessor = RandomTreesEmbedding(random_state=1,
-                                            **{hp_name: default[hp_name] for
-                                               hp_name in
-                                               default})
+        preprocessor = RandomTreesEmbedding(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train)
         Xt = preprocessor.transform(X_train)
 
@@ -40,10 +40,9 @@ def test_preprocessing_dtype(self):
         X_train = X_train.astype(np.float64)
         configuration_space = RandomTreesEmbedding.get_hyperparameter_search_space()
         default = configuration_space.get_default_configuration()
-        preprocessor = RandomTreesEmbedding(random_state=1,
-                                            **{hp_name: default[hp_name] for
-                                               hp_name in
-                                               default})
+        preprocessor = RandomTreesEmbedding(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_select_percentile_classification.py b/test/test_pipeline/components/feature_preprocessing/test_select_percentile_classification.py
index d7cde925b0..b177e4f4ba 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_select_percentile_classification.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_select_percentile_classification.py
@@ -4,8 +4,9 @@
 import scipy.sparse
 import sklearn.preprocessing
 
-from autosklearn.pipeline.components.feature_preprocessing.select_percentile_classification \
-   import SelectPercentileClassification
+from autosklearn.pipeline.components.feature_preprocessing.select_percentile_classification import (  # noqa: E501
+    SelectPercentileClassification,
+)
 from autosklearn.pipeline.util import _test_preprocessing, get_dataset
 
 
@@ -13,29 +14,35 @@ class SelectPercentileClassificationTest(unittest.TestCase):
     def test_default_configuration(self):
         transformation, original = _test_preprocessing(SelectPercentileClassification)
         self.assertEqual(transformation.shape[0], original.shape[0])
-        self.assertEqual(transformation.shape[1], int(original.shape[1]/2))
+        self.assertEqual(transformation.shape[1], int(original.shape[1] / 2))
         self.assertFalse((transformation == 0).all())
 
         transformation, original = _test_preprocessing(
-           SelectPercentileClassification,
-           make_sparse=True,
-           )
+            SelectPercentileClassification,
+            make_sparse=True,
+        )
         self.assertTrue(scipy.sparse.issparse(transformation))
         self.assertEqual(transformation.shape[0], original.shape[0])
-        self.assertEqual(transformation.shape[1], int(original.shape[1]/2))
+        self.assertEqual(transformation.shape[1], int(original.shape[1] / 2))
 
         # Custon preprocessing test to check if clipping to zero works
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
         original_X_train = X_train.copy()
         ss = sklearn.preprocessing.StandardScaler()
         X_train = ss.fit_transform(X_train)
-        configuration_space = SelectPercentileClassification.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectPercentileClassification.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
 
         preprocessor = SelectPercentileClassification(
-           random_state=1,
-           **{hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None},
-           )
+            random_state=1,
+            **{
+                hp_name: default[hp_name]
+                for hp_name in default
+                if default[hp_name] is not None
+            },
+        )
 
         transformer = preprocessor.fit(X_train, Y_train)
         transformation, original = transformer.transform(X_train), original_X_train
@@ -48,11 +55,13 @@ def test_preprocessing_dtype(self):
         X_train, Y_train, X_test, Y_test = get_dataset("iris")
         self.assertEqual(X_train.dtype, np.float32)
 
-        configuration_space = SelectPercentileClassification.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectPercentileClassification.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectPercentileClassification(random_state=1,
-                                                      **{hp_name: default[hp_name]
-                                                         for hp_name in default})
+        preprocessor = SelectPercentileClassification(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -60,11 +69,13 @@ def test_preprocessing_dtype(self):
         # np.float64
         X_train, Y_train, X_test, Y_test = get_dataset("iris")
         X_train = X_train.astype(np.float64)
-        configuration_space = SelectPercentileClassification.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectPercentileClassification.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectPercentileClassification(random_state=1,
-                                                      **{hp_name: default[hp_name]
-                                                         for hp_name in default})
+        preprocessor = SelectPercentileClassification(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
@@ -73,11 +84,13 @@ def test_preprocessing_dtype(self):
         # np.float32
         X_train, Y_train, X_test, Y_test = get_dataset("iris", make_sparse=True)
         self.assertEqual(X_train.dtype, np.float32)
-        configuration_space = SelectPercentileClassification.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectPercentileClassification.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectPercentileClassification(random_state=1,
-                                                      **{hp_name: default[hp_name]
-                                                         for hp_name in default})
+        preprocessor = SelectPercentileClassification(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -85,11 +98,13 @@ def test_preprocessing_dtype(self):
         # np.float64
         X_train, Y_train, X_test, Y_test = get_dataset("iris", make_sparse=True)
         X_train = X_train.astype(np.float64)
-        configuration_space = SelectPercentileClassification.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectPercentileClassification.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectPercentileClassification(random_state=1,
-                                                      **{hp_name: default[hp_name]
-                                                         for hp_name in default})
+        preprocessor = SelectPercentileClassification(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_select_percentile_regression.py b/test/test_pipeline/components/feature_preprocessing/test_select_percentile_regression.py
index a76a15c5a3..0fd335fd83 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_select_percentile_regression.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_select_percentile_regression.py
@@ -2,8 +2,9 @@
 
 import numpy as np
 
-from autosklearn.pipeline.components.feature_preprocessing.select_percentile_regression \
-    import SelectPercentileRegression
+from autosklearn.pipeline.components.feature_preprocessing.select_percentile_regression import (  # noqa: E501
+    SelectPercentileRegression,
+)
 from autosklearn.pipeline.util import _test_preprocessing, get_dataset
 
 
@@ -12,9 +13,9 @@ def test_default_configuration(self):
         transformation, original = _test_preprocessing(
             dataset="boston",
             Preprocessor=SelectPercentileRegression,
-            )
+        )
         self.assertEqual(transformation.shape[0], original.shape[0])
-        self.assertEqual(transformation.shape[1], int(original.shape[1]/2))
+        self.assertEqual(transformation.shape[1], int(original.shape[1] / 2))
         self.assertFalse((transformation == 0).all())
 
     def test_preprocessing_dtype(self):
@@ -23,11 +24,13 @@ def test_preprocessing_dtype(self):
         X_train, Y_train, X_test, Y_test = get_dataset("iris")
         self.assertEqual(X_train.dtype, np.float32)
 
-        configuration_space = SelectPercentileRegression.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectPercentileRegression.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectPercentileRegression(random_state=1,
-                                                  **{hp_name: default[hp_name]
-                                                     for hp_name in default})
+        preprocessor = SelectPercentileRegression(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -35,11 +38,13 @@ def test_preprocessing_dtype(self):
         # np.float64
         X_train, Y_train, X_test, Y_test = get_dataset("iris")
         X_train = X_train.astype(np.float64)
-        configuration_space = SelectPercentileRegression.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectPercentileRegression.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectPercentileRegression(random_state=1,
-                                                  **{hp_name: default[hp_name]
-                                                     for hp_name in default})
+        preprocessor = SelectPercentileRegression(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_select_rates_classification.py b/test/test_pipeline/components/feature_preprocessing/test_select_rates_classification.py
index 2497b5174a..2d1c2aaf78 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_select_rates_classification.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_select_rates_classification.py
@@ -4,8 +4,9 @@
 import scipy.sparse
 import sklearn.preprocessing
 
-from autosklearn.pipeline.components.feature_preprocessing.select_rates_classification import \
-    SelectClassificationRates
+from autosklearn.pipeline.components.feature_preprocessing.select_rates_classification import (  # noqa: E501
+    SelectClassificationRates,
+)
 from autosklearn.pipeline.util import _test_preprocessing, get_dataset
 
 
@@ -17,27 +18,33 @@ def test_default_configuration(self):
         self.assertFalse((transformation == 0).all())
 
         transformation, original = _test_preprocessing(
-            SelectClassificationRates, make_sparse=True)
+            SelectClassificationRates, make_sparse=True
+        )
         self.assertTrue(scipy.sparse.issparse(transformation))
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertEqual(transformation.shape[1], int(original.shape[1] / 2))
 
         # Custom preprocessing test to check if clipping to zero works
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
         original_X_train = X_train.copy()
         ss = sklearn.preprocessing.StandardScaler()
         X_train = ss.fit_transform(X_train)
-        configuration_space = SelectClassificationRates.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectClassificationRates.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
 
-        preprocessor = SelectClassificationRates(random_state=1,
-                                                 **{hp_name: default[hp_name]
-                                                    for hp_name in default
-                                                    if default[hp_name] is not None})
+        preprocessor = SelectClassificationRates(
+            random_state=1,
+            **{
+                hp_name: default[hp_name]
+                for hp_name in default
+                if default[hp_name] is not None
+            },
+        )
 
         transformer = preprocessor.fit(X_train, Y_train)
-        transformation, original = transformer.transform(
-            X_train), original_X_train
+        transformation, original = transformer.transform(X_train), original_X_train
         self.assertEqual(transformation.shape[0], original.shape[0])
         # I don't know why it's 52 here and not 32 which would be half of the
         # number of features. Seems to be related to a runtime warning raised
@@ -50,11 +57,13 @@ def test_preprocessing_dtype(self):
         X_train, Y_train, X_test, Y_test = get_dataset("iris")
         self.assertEqual(X_train.dtype, np.float32)
 
-        configuration_space = SelectClassificationRates.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectClassificationRates.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectClassificationRates(random_state=1,
-                                                 **{hp_name: default[hp_name] for hp_name in
-                                                    default})
+        preprocessor = SelectClassificationRates(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -62,11 +71,13 @@ def test_preprocessing_dtype(self):
         # np.float64
         X_train, Y_train, X_test, Y_test = get_dataset("iris")
         X_train = X_train.astype(np.float64)
-        configuration_space = SelectClassificationRates.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectClassificationRates.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectClassificationRates(random_state=1,
-                                                 **{hp_name: default[hp_name] for hp_name in
-                                                    default})
+        preprocessor = SelectClassificationRates(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
@@ -75,11 +86,13 @@ def test_preprocessing_dtype(self):
         # np.float32
         X_train, Y_train, X_test, Y_test = get_dataset("iris", make_sparse=True)
         self.assertEqual(X_train.dtype, np.float32)
-        configuration_space = SelectClassificationRates.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectClassificationRates.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectClassificationRates(random_state=1,
-                                                 **{hp_name: default[hp_name] for hp_name in
-                                                    default})
+        preprocessor = SelectClassificationRates(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -87,11 +100,13 @@ def test_preprocessing_dtype(self):
         # np.float64
         X_train, Y_train, X_test, Y_test = get_dataset("iris", make_sparse=True)
         X_train = X_train.astype(np.float64)
-        configuration_space = SelectClassificationRates.get_hyperparameter_search_space()
+        configuration_space = (
+            SelectClassificationRates.get_hyperparameter_search_space()
+        )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectClassificationRates(random_state=1,
-                                                 **{hp_name: default[hp_name] for hp_name in
-                                                    default})
+        preprocessor = SelectClassificationRates(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_select_rates_regression.py b/test/test_pipeline/components/feature_preprocessing/test_select_rates_regression.py
index 573bab32ce..869d7fbee2 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_select_rates_regression.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_select_rates_regression.py
@@ -4,8 +4,9 @@
 import scipy.sparse
 import sklearn.preprocessing
 
-from autosklearn.pipeline.components.feature_preprocessing.select_rates_regression import \
-    SelectRegressionRates
+from autosklearn.pipeline.components.feature_preprocessing.select_rates_regression import (  # noqa: E501
+    SelectRegressionRates,
+)
 from autosklearn.pipeline.util import _test_preprocessing, get_dataset
 
 
@@ -17,34 +18,38 @@ def test_default_configuration(self):
         self.assertFalse((transformation == 0).all())
 
         transformation, original = _test_preprocessing(
-            SelectRegressionRates, make_sparse=True)
+            SelectRegressionRates, make_sparse=True
+        )
         self.assertTrue(scipy.sparse.issparse(transformation))
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertEqual(transformation.shape[1], int(original.shape[1] / 2))
 
         # Makes sure that the features are reduced, not the number of samples
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
         original_X_train = X_train.copy()
         ss = sklearn.preprocessing.StandardScaler()
         X_train = ss.fit_transform(X_train)
         configuration_space = SelectRegressionRates.get_hyperparameter_search_space()
         default = configuration_space.get_default_configuration()
 
-        preprocessor = SelectRegressionRates(random_state=1,
-                                             **{hp_name: default[hp_name]
-                                                for hp_name in default
-                                                if default[hp_name] is not None})
+        preprocessor = SelectRegressionRates(
+            random_state=1,
+            **{
+                hp_name: default[hp_name]
+                for hp_name in default
+                if default[hp_name] is not None
+            },
+        )
 
         transformer = preprocessor.fit(X_train, Y_train)
-        transformation, original = transformer.transform(
-            X_train), original_X_train
+        transformation, original = transformer.transform(X_train), original_X_train
         self.assertEqual(transformation.shape[0], original.shape[0])
         self.assertEqual(transformation.shape[1], 21)
 
     def test_default_configuration_regression(self):
         transformation, original = _test_preprocessing(
             SelectRegressionRates,
-            dataset='boston',
+            dataset="boston",
         )
         self.assertEqual(transformation.shape[0], original.shape[0])
         # From 13 to 12 features
@@ -57,15 +62,15 @@ def test_preprocessing_dtype_regression(self):
         X_train, Y_train, X_test, Y_test = get_dataset("boston")
         self.assertEqual(X_train.dtype, np.float32)
 
-        dataset_properties = {'target_type': 'regression'}
+        dataset_properties = {"target_type": "regression"}
 
         configuration_space = SelectRegressionRates.get_hyperparameter_search_space(
             dataset_properties
         )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectRegressionRates(random_state=1,
-                                             **{hp_name: default[hp_name] for hp_name in
-                                                default})
+        preprocessor = SelectRegressionRates(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float32)
@@ -77,9 +82,9 @@ def test_preprocessing_dtype_regression(self):
             dataset_properties
         )
         default = configuration_space.get_default_configuration()
-        preprocessor = SelectRegressionRates(random_state=1,
-                                             **{hp_name: default[hp_name] for hp_name in
-                                                default})
+        preprocessor = SelectRegressionRates(
+            random_state=1, **{hp_name: default[hp_name] for hp_name in default}
+        )
         preprocessor.fit(X_train, Y_train)
         Xt = preprocessor.transform(X_train)
         self.assertEqual(Xt.dtype, np.float64)
diff --git a/test/test_pipeline/components/feature_preprocessing/test_truncatedSVD.py b/test/test_pipeline/components/feature_preprocessing/test_truncatedSVD.py
index df1f1d2fe6..7e16fa7fa5 100644
--- a/test/test_pipeline/components/feature_preprocessing/test_truncatedSVD.py
+++ b/test/test_pipeline/components/feature_preprocessing/test_truncatedSVD.py
@@ -1,11 +1,16 @@
 import unittest
 
-from sklearn.linear_model import RidgeClassifier
-from autosklearn.pipeline.components.feature_preprocessing.truncatedSVD import \
-    TruncatedSVD
-from autosklearn.pipeline.util import _test_preprocessing, PreprocessingTestCase, \
-    get_dataset
 import sklearn.metrics
+from sklearn.linear_model import RidgeClassifier
+
+from autosklearn.pipeline.components.feature_preprocessing.truncatedSVD import (
+    TruncatedSVD,
+)
+from autosklearn.pipeline.util import (
+    PreprocessingTestCase,
+    _test_preprocessing,
+    get_dataset,
+)
 
 
 class TruncatedSVDComponentTest(PreprocessingTestCase):
@@ -16,14 +21,19 @@ def test_default_configuration(self):
 
     def test_default_configuration_classify(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
-                                                           make_sparse=True)
+            X_train, Y_train, X_test, Y_test = get_dataset(
+                dataset="digits", make_sparse=True
+            )
             configuration_space = TruncatedSVD.get_hyperparameter_search_space()
             default = configuration_space.get_default_configuration()
             preprocessor = TruncatedSVD(
                 random_state=1,
-                **{hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None}
-                )
+                **{
+                    hp_name: default[hp_name]
+                    for hp_name in default
+                    if default[hp_name] is not None
+                },
+            )
             preprocessor.fit(X_train, Y_train)
             X_train_trans = preprocessor.transform(X_train)
             X_test_trans = preprocessor.transform(X_test)
@@ -37,6 +47,6 @@ def test_default_configuration_classify(self):
 
     @unittest.skip("Truncated SVD returns np.float64.")
     def test_preprocessing_dtype(self):
-        super(TruncatedSVDComponentTest,
-              self)._test_preprocessing_dtype(TruncatedSVD,
-                                              test_sparse=False)
+        super(TruncatedSVDComponentTest, self)._test_preprocessing_dtype(
+            TruncatedSVD, test_sparse=False
+        )
diff --git a/test/test_pipeline/components/regression/test_adaboost.py b/test/test_pipeline/components/regression/test_adaboost.py
index c7f199d5ee..b62df4fd9b 100644
--- a/test/test_pipeline/components/regression/test_adaboost.py
+++ b/test/test_pipeline/components/regression/test_adaboost.py
@@ -1,7 +1,7 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.regression.adaboost import \
-    AdaboostRegressor
+from autosklearn.pipeline.components.regression.adaboost import AdaboostRegressor
+
 from .test_base import BaseRegressionComponentTest
 
 
diff --git a/test/test_pipeline/components/regression/test_ard_regression.py b/test/test_pipeline/components/regression/test_ard_regression.py
index dac8d61349..829bf9b507 100644
--- a/test/test_pipeline/components/regression/test_ard_regression.py
+++ b/test/test_pipeline/components/regression/test_ard_regression.py
@@ -1,7 +1,7 @@
 import sklearn.linear_model
 
-from autosklearn.pipeline.components.regression.ard_regression import \
-    ARDRegression
+from autosklearn.pipeline.components.regression.ard_regression import ARDRegression
+
 from .test_base import BaseRegressionComponentTest
 
 
diff --git a/test/test_pipeline/components/regression/test_base.py b/test/test_pipeline/components/regression/test_base.py
index 8ffc1d23fe..dcf7770332 100644
--- a/test/test_pipeline/components/regression/test_base.py
+++ b/test/test_pipeline/components/regression/test_base.py
@@ -1,19 +1,17 @@
-from typing import Type, Container
+from typing import Container, Type
 
 import unittest
 
-import pytest
-
 import numpy as np
+import pytest
 import sklearn.metrics
 
-from autosklearn.pipeline.util import _test_regressor, _test_regressor_iterative_fit
-from autosklearn.pipeline.constants import SPARSE
+from autosklearn.pipeline.components.regression import RegressorChoice, _regressors
 from autosklearn.pipeline.components.regression.libsvm_svr import LibSVM_SVR
+from autosklearn.pipeline.constants import SPARSE
+from autosklearn.pipeline.util import _test_regressor, _test_regressor_iterative_fit
 
-from autosklearn.pipeline.components.regression import _regressors, RegressorChoice
-
-from test.test_pipeline.ignored_warnings import regressor_warnings, ignore_warnings
+from test.test_pipeline.ignored_warnings import ignore_warnings, regressor_warnings
 
 
 class BaseRegressionComponentTest(unittest.TestCase):
@@ -37,8 +35,7 @@ def test_default_boston(self):
 
             with ignore_warnings(regressor_warnings):
                 predictions, targets, n_calls = _test_regressor(
-                    dataset="boston",
-                    Regressor=self.module
+                    dataset="boston", Regressor=self.module
                 )
 
             score = sklearn.metrics.r2_score(y_true=targets, y_pred=predictions)
@@ -70,14 +67,13 @@ def test_default_boston_iterative_fit(self):
         if self.__class__ == BaseRegressionComponentTest:
             return
 
-        if not hasattr(self.module, 'iterative_fit'):
+        if not hasattr(self.module, "iterative_fit"):
             return
 
         for i in range(2):
             with ignore_warnings(regressor_warnings):
                 predictions, targets, regressor = _test_regressor_iterative_fit(
-                    dataset="boston",
-                    Regressor=self.module
+                    dataset="boston", Regressor=self.module
                 )
 
             score = sklearn.metrics.r2_score(targets, predictions)
@@ -92,8 +88,8 @@ def test_default_boston_iterative_fit(self):
             self.assertAlmostEqual(fixture, score, places)
 
             if self.step_hyperparameter is not None:
-                param_name = self.step_hyperparameter['name']
-                default = self.step_hyperparameter['value']
+                param_name = self.step_hyperparameter["name"]
+                default = self.step_hyperparameter["value"]
 
                 value = getattr(regressor.estimator, param_name)
                 expected = self.res.get("boston_iterative_n_iter", default)
@@ -110,7 +106,7 @@ def test_default_boston_iterative_sparse_fit(self):
         if self.__class__ == BaseRegressionComponentTest:
             return
 
-        if not hasattr(self.module, 'iterative_fit'):
+        if not hasattr(self.module, "iterative_fit"):
             return
 
         if SPARSE not in self.module.get_properties()["input"]:
@@ -119,15 +115,13 @@ def test_default_boston_iterative_sparse_fit(self):
         for i in range(2):
             with ignore_warnings(regressor_warnings):
                 predictions, targets, _ = _test_regressor_iterative_fit(
-                    dataset="boston",
-                    Regressor=self.module,
-                    sparse=True
+                    dataset="boston", Regressor=self.module, sparse=True
                 )
-            self.assertAlmostEqual(self.res["default_boston_iterative_sparse"],
-                                   sklearn.metrics.r2_score(targets,
-                                                            predictions),
-                                   places=self.res.get(
-                                           "default_boston_iterative_sparse_places", 7))
+            self.assertAlmostEqual(
+                self.res["default_boston_iterative_sparse"],
+                sklearn.metrics.r2_score(targets, predictions),
+                places=self.res.get("default_boston_iterative_sparse_places", 7),
+            )
 
     def test_default_boston_sparse(self):
 
@@ -140,16 +134,14 @@ def test_default_boston_sparse(self):
         for i in range(2):
             with ignore_warnings(regressor_warnings):
                 predictions, targets, _ = _test_regressor(
-                    dataset="boston",
-                    Regressor=self.module,
-                    sparse=True
+                    dataset="boston", Regressor=self.module, sparse=True
                 )
 
-            self.assertAlmostEqual(self.res["default_boston_sparse"],
-                                   sklearn.metrics.r2_score(targets,
-                                                            predictions),
-                                   places=self.res.get(
-                                           "default_boston_sparse_places", 7))
+            self.assertAlmostEqual(
+                self.res["default_boston_sparse"],
+                sklearn.metrics.r2_score(targets, predictions),
+                places=self.res.get("default_boston_sparse_places", 7),
+            )
 
     def test_default_diabetes(self):
 
@@ -159,15 +151,14 @@ def test_default_diabetes(self):
         for i in range(2):
             with ignore_warnings(regressor_warnings):
                 predictions, targets, n_calls = _test_regressor(
-                    dataset="diabetes",
-                    Regressor=self.module
+                    dataset="diabetes", Regressor=self.module
                 )
 
-            self.assertAlmostEqual(self.res["default_diabetes"],
-                                   sklearn.metrics.r2_score(targets,
-                                                            predictions),
-                                   places=self.res.get(
-                                           "default_diabetes_places", 7))
+            self.assertAlmostEqual(
+                self.res["default_diabetes"],
+                sklearn.metrics.r2_score(targets, predictions),
+                places=self.res.get("default_diabetes_places", 7),
+            )
 
             if self.res.get("diabetes_n_calls"):
                 self.assertEqual(self.res["diabetes_n_calls"], n_calls)
@@ -177,28 +168,27 @@ def test_default_diabetes_iterative_fit(self):
         if self.__class__ == BaseRegressionComponentTest:
             return
 
-        if not hasattr(self.module, 'iterative_fit'):
+        if not hasattr(self.module, "iterative_fit"):
             return
 
         for i in range(2):
             with ignore_warnings(regressor_warnings):
                 predictions, targets, _ = _test_regressor_iterative_fit(
-                    dataset="diabetes",
-                    Regressor=self.module
+                    dataset="diabetes", Regressor=self.module
                 )
 
-            self.assertAlmostEqual(self.res["default_diabetes_iterative"],
-                                   sklearn.metrics.r2_score(targets,
-                                                            predictions),
-                                   places=self.res.get(
-                                           "default_diabetes_iterative_places", 7))
+            self.assertAlmostEqual(
+                self.res["default_diabetes_iterative"],
+                sklearn.metrics.r2_score(targets, predictions),
+                places=self.res.get("default_diabetes_iterative_places", 7),
+            )
 
     def test_default_diabetes_iterative_sparse_fit(self):
 
         if self.__class__ == BaseRegressionComponentTest:
             return
 
-        if not hasattr(self.module, 'iterative_fit'):
+        if not hasattr(self.module, "iterative_fit"):
             return
 
         if SPARSE not in self.module.get_properties()["input"]:
@@ -207,21 +197,21 @@ def test_default_diabetes_iterative_sparse_fit(self):
         for i in range(2):
             with ignore_warnings(regressor_warnings):
                 predictions, targets, regressor = _test_regressor_iterative_fit(
-                    dataset="diabetes",
-                    Regressor=self.module,
-                    sparse=True
+                    dataset="diabetes", Regressor=self.module, sparse=True
                 )
 
-            self.assertAlmostEqual(self.res["default_diabetes_iterative_sparse"],
-                                   sklearn.metrics.r2_score(targets,
-                                                            predictions),
-                                   places=self.res.get(
-                                           "default_diabetes_iterative_sparse_places", 7))
+            self.assertAlmostEqual(
+                self.res["default_diabetes_iterative_sparse"],
+                sklearn.metrics.r2_score(targets, predictions),
+                places=self.res.get("default_diabetes_iterative_sparse_places", 7),
+            )
 
             if self.step_hyperparameter is not None:
                 self.assertEqual(
-                    getattr(regressor.estimator, self.step_hyperparameter['name']),
-                    self.res.get("diabetes_iterative_n_iter", self.step_hyperparameter['value'])
+                    getattr(regressor.estimator, self.step_hyperparameter["name"]),
+                    self.res.get(
+                        "diabetes_iterative_n_iter", self.step_hyperparameter["value"]
+                    ),
                 )
 
     def test_default_diabetes_sparse(self):
@@ -235,47 +225,73 @@ def test_default_diabetes_sparse(self):
         for i in range(2):
             with ignore_warnings(regressor_warnings):
                 predictions, targets, _ = _test_regressor(
-                    dataset="diabetes",
-                    Regressor=self.module,
-                    sparse=True
+                    dataset="diabetes", Regressor=self.module, sparse=True
                 )
 
-            self.assertAlmostEqual(self.res["default_diabetes_sparse"],
-                                   sklearn.metrics.r2_score(targets,
-                                                            predictions),
-                                   places=self.res.get(
-                                           "default_diabetes_sparse_places", 7))
+            self.assertAlmostEqual(
+                self.res["default_diabetes_sparse"],
+                sklearn.metrics.r2_score(targets, predictions),
+                places=self.res.get("default_diabetes_sparse_places", 7),
+            )
 
     def test_module_idempotent(self):
-        """ Fitting twice with the same config gives the same model params.
+        """Fitting twice with the same config gives the same model params.
 
-            This is only valid when the random_state passed is an int. If a
-            RandomState object is passed then repeated calls to fit will have
-            different results. See the section on "Controlling Randomness" in the
-            sklearn docs.
+        This is only valid when the random_state passed is an int. If a
+        RandomState object is passed then repeated calls to fit will have
+        different results. See the section on "Controlling Randomness" in the
+        sklearn docs.
 
-            https://scikit-learn.org/0.24/common_pitfalls.html#controlling-randomness
+        https://scikit-learn.org/0.24/common_pitfalls.html#controlling-randomness
         """
         if self.__class__ == BaseRegressionComponentTest:
             return
 
         regressor_cls = self.module
 
-        X = np.array([
-            [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5],
-            [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5],
-            [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5],
-            [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5],
-        ])
-        y = np.array([
-            1, 1, 1, 1,
-            1, 1, 1, 1,
-            1, 1, 1, 1,
-            1, 1, 1, 1,
-        ])
+        X = np.array(
+            [
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+                [0.5, 0.5],
+            ]
+        )
+        y = np.array(
+            [
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+            ]
+        )
 
         # We ignore certain keys when comparing
-        param_keys_ignored = ['base_estimator']
+        param_keys_ignored = ["base_estimator"]
 
         # We use the default config + sampled ones
         configuration_space = regressor_cls.get_hyperparameter_search_space()
@@ -292,14 +308,14 @@ def test_module_idempotent(self):
             with ignore_warnings(regressor_warnings):
                 params_first = regressor.fit(X.copy(), y.copy()).estimator.get_params()
 
-            if hasattr(regressor.estimator, 'random_state'):
+            if hasattr(regressor.estimator, "random_state"):
                 rs_1 = regressor.random_state
                 rs_estimator_1 = regressor.estimator.random_state
 
             with ignore_warnings(regressor_warnings):
                 params_second = regressor.fit(X.copy(), y.copy()).estimator.get_params()
 
-            if hasattr(regressor.estimator, 'random_state'):
+            if hasattr(regressor.estimator, "random_state"):
                 rs_2 = regressor.random_state
                 rs_estimator_2 = regressor.estimator.random_state
 
@@ -310,27 +326,27 @@ def test_module_idempotent(self):
                         del params[key]
 
             # They should have equal parameters
-            self.assertEqual(params_first, params_second,
-                             f"Failed with model args {model_args}")
-            if (
-                hasattr(regressor.estimator, 'random_state')
-                and not isinstance(regressor, LibSVM_SVR)
+            self.assertEqual(
+                params_first, params_second, f"Failed with model args {model_args}"
+            )
+            if hasattr(regressor.estimator, "random_state") and not isinstance(
+                regressor, LibSVM_SVR
             ):
                 # sklearn.svm.SVR has it as an attribute but does not use it and
                 # defaults it to None, even if a value is passed in
-                assert all([
-                    seed == random_state
-                    for random_state in [rs_1, rs_estimator_1, rs_2, rs_estimator_2]
-                ])
+                assert all(
+                    [
+                        seed == random_state
+                        for random_state in [rs_1, rs_estimator_1, rs_2, rs_estimator_2]
+                    ]
+                )
 
 
 @pytest.mark.parametrize("regressor", _regressors.values())
 @pytest.mark.parametrize("X", [np.array([[1, 2, 3]] * 20)])
 @pytest.mark.parametrize("y", [np.array([1] * 20)])
 def test_fit_and_predict_with_1d_targets_as_1d(
-    regressor: Type[RegressorChoice],
-    X: np.ndarray,
-    y: np.ndarray
+    regressor: Type[RegressorChoice], X: np.ndarray, y: np.ndarray
 ) -> None:
     """Test that all pipelines work with 1d target types
 
@@ -371,9 +387,7 @@ def test_fit_and_predict_with_1d_targets_as_1d(
 @pytest.mark.parametrize("X", [np.array([[1, 2, 3]] * 20)])
 @pytest.mark.parametrize("y", [np.array([[1]] * 20)])
 def test_fit_and_predict_with_1d_targets_as_2d(
-    regressor: Type[RegressorChoice],
-    X: np.ndarray,
-    y: np.ndarray
+    regressor: Type[RegressorChoice], X: np.ndarray, y: np.ndarray
 ) -> None:
     """Test that all pipelines work with 1d target types when they are wrapped as 2d
 
@@ -412,17 +426,18 @@ def test_fit_and_predict_with_1d_targets_as_2d(
     assert len(predictions) == len(y)
 
 
-@pytest.mark.parametrize("regressor", [
-    regressor
-    for regressor in _regressors.values()
-    if regressor.get_properties()['handles_multilabel']
-])
+@pytest.mark.parametrize(
+    "regressor",
+    [
+        regressor
+        for regressor in _regressors.values()
+        if regressor.get_properties()["handles_multilabel"]
+    ],
+)
 @pytest.mark.parametrize("X", [np.array([[1, 2, 3]] * 20)])
 @pytest.mark.parametrize("y", [np.array([[1, 1, 1]] * 20)])
 def test_fit_and_predict_with_2d_targets(
-    regressor: Type[RegressorChoice],
-    X: np.ndarray,
-    y: np.ndarray
+    regressor: Type[RegressorChoice], X: np.ndarray, y: np.ndarray
 ) -> None:
     """Test that all pipelines work with 2d target types
 
diff --git a/test/test_pipeline/components/regression/test_decision_tree.py b/test/test_pipeline/components/regression/test_decision_tree.py
index a5d2e53990..942b9db601 100644
--- a/test/test_pipeline/components/regression/test_decision_tree.py
+++ b/test/test_pipeline/components/regression/test_decision_tree.py
@@ -1,7 +1,7 @@
 import sklearn.tree
 
-from autosklearn.pipeline.components.regression.decision_tree import \
-    DecisionTree
+from autosklearn.pipeline.components.regression.decision_tree import DecisionTree
+
 from .test_base import BaseRegressionComponentTest
 
 
diff --git a/test/test_pipeline/components/regression/test_extra_trees.py b/test/test_pipeline/components/regression/test_extra_trees.py
index 5d6f6d1acf..8d92fa30c8 100644
--- a/test/test_pipeline/components/regression/test_extra_trees.py
+++ b/test/test_pipeline/components/regression/test_extra_trees.py
@@ -1,7 +1,7 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.regression.extra_trees import \
-    ExtraTreesRegressor
+from autosklearn.pipeline.components.regression.extra_trees import ExtraTreesRegressor
+
 from .test_base import BaseRegressionComponentTest
 
 
@@ -12,18 +12,18 @@ class ExtraTreesComponentTest(BaseRegressionComponentTest):
     res = dict()
     res["default_boston"] = 0.8539264243687228
     res["boston_n_calls"] = 9
-    res["default_boston_iterative"] = res['default_boston']
+    res["default_boston_iterative"] = res["default_boston"]
     res["default_boston_sparse"] = 0.411211701806908
-    res["default_boston_iterative_sparse"] = res['default_boston_sparse']
+    res["default_boston_iterative_sparse"] = res["default_boston_sparse"]
     res["default_diabetes"] = 0.3885150255877827
     res["diabetes_n_calls"] = 9
-    res["default_diabetes_iterative"] = res['default_diabetes']
+    res["default_diabetes_iterative"] = res["default_diabetes"]
     res["default_diabetes_sparse"] = 0.2422804139169642
-    res["default_diabetes_iterative_sparse"] = res['default_diabetes_sparse']
+    res["default_diabetes_iterative_sparse"] = res["default_diabetes_sparse"]
 
     sk_mod = sklearn.ensemble.ExtraTreesRegressor
     module = ExtraTreesRegressor
     step_hyperparameter = {
-        'name': 'n_estimators',
-        'value': module.get_max_iter(),
+        "name": "n_estimators",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/regression/test_gaussian_process.py b/test/test_pipeline/components/regression/test_gaussian_process.py
index d148d416df..0f766e22b1 100644
--- a/test/test_pipeline/components/regression/test_gaussian_process.py
+++ b/test/test_pipeline/components/regression/test_gaussian_process.py
@@ -1,7 +1,6 @@
 import sklearn.gaussian_process
 
-from autosklearn.pipeline.components.regression.gaussian_process import \
-    GaussianProcess
+from autosklearn.pipeline.components.regression.gaussian_process import GaussianProcess
 
 from .test_base import BaseRegressionComponentTest
 
diff --git a/test/test_pipeline/components/regression/test_gradient_boosting.py b/test/test_pipeline/components/regression/test_gradient_boosting.py
index 9fcb2cd623..6412fd0598 100644
--- a/test/test_pipeline/components/regression/test_gradient_boosting.py
+++ b/test/test_pipeline/components/regression/test_gradient_boosting.py
@@ -1,7 +1,8 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.regression.gradient_boosting import \
-    GradientBoosting
+from autosklearn.pipeline.components.regression.gradient_boosting import (
+    GradientBoosting,
+)
 
 from .test_base import BaseRegressionComponentTest
 
diff --git a/test/test_pipeline/components/regression/test_k_nearest_neighbors.py b/test/test_pipeline/components/regression/test_k_nearest_neighbors.py
index 40637c3ec8..19d0cf40f5 100644
--- a/test/test_pipeline/components/regression/test_k_nearest_neighbors.py
+++ b/test/test_pipeline/components/regression/test_k_nearest_neighbors.py
@@ -1,7 +1,9 @@
 import sklearn.neighbors
 
-from autosklearn.pipeline.components.regression.k_nearest_neighbors import \
-    KNearestNeighborsRegressor
+from autosklearn.pipeline.components.regression.k_nearest_neighbors import (
+    KNearestNeighborsRegressor,
+)
+
 from .test_base import BaseRegressionComponentTest
 
 
diff --git a/test/test_pipeline/components/regression/test_liblinear_svr.py b/test/test_pipeline/components/regression/test_liblinear_svr.py
index 42b73bfba7..37b6552c9b 100644
--- a/test/test_pipeline/components/regression/test_liblinear_svr.py
+++ b/test/test_pipeline/components/regression/test_liblinear_svr.py
@@ -1,7 +1,7 @@
 import sklearn.svm
 
-from autosklearn.pipeline.components.regression.liblinear_svr import \
-    LibLinear_SVR
+from autosklearn.pipeline.components.regression.liblinear_svr import LibLinear_SVR
+
 from .test_base import BaseRegressionComponentTest
 
 
diff --git a/test/test_pipeline/components/regression/test_mlp.py b/test/test_pipeline/components/regression/test_mlp.py
index c003037c76..9e2a92acac 100644
--- a/test/test_pipeline/components/regression/test_mlp.py
+++ b/test/test_pipeline/components/regression/test_mlp.py
@@ -64,6 +64,6 @@ class MLPComponentTest(BaseRegressionComponentTest):
     sk_mod = sklearn.neural_network.MLPRegressor
     module = MLPRegressor
     step_hyperparameter = {
-        'name': 'n_iter_',
-        'value': module.get_max_iter(),
+        "name": "n_iter_",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/regression/test_random_forests.py b/test/test_pipeline/components/regression/test_random_forests.py
index ee6f342a8e..6e1634ff83 100644
--- a/test/test_pipeline/components/regression/test_random_forests.py
+++ b/test/test_pipeline/components/regression/test_random_forests.py
@@ -1,7 +1,7 @@
 import sklearn.ensemble
 
-from autosklearn.pipeline.components.regression.random_forest import \
-    RandomForest
+from autosklearn.pipeline.components.regression.random_forest import RandomForest
+
 from .test_base import BaseRegressionComponentTest
 
 
@@ -11,18 +11,18 @@ class RandomForestComponentTest(BaseRegressionComponentTest):
     res = dict()
     res["default_boston"] = 0.8410063895401654
     res["boston_n_calls"] = 9
-    res["default_boston_iterative"] = res['default_boston']
+    res["default_boston_iterative"] = res["default_boston"]
     res["default_boston_sparse"] = 0.4194462097407078
-    res["default_boston_iterative_sparse"] = res['default_boston_sparse']
+    res["default_boston_iterative_sparse"] = res["default_boston_sparse"]
     res["default_diabetes"] = 0.3496051170409269
     res["diabetes_n_calls"] = 9
-    res["default_diabetes_iterative"] = res['default_diabetes']
+    res["default_diabetes_iterative"] = res["default_diabetes"]
     res["default_diabetes_sparse"] = 0.2383300978781976
-    res["default_diabetes_iterative_sparse"] = res['default_diabetes_sparse']
+    res["default_diabetes_iterative_sparse"] = res["default_diabetes_sparse"]
 
     sk_mod = sklearn.ensemble.RandomForestRegressor
     module = RandomForest
     step_hyperparameter = {
-        'name': 'n_estimators',
-        'value': module.get_max_iter(),
+        "name": "n_estimators",
+        "value": module.get_max_iter(),
     }
diff --git a/test/test_pipeline/components/regression/test_sgd.py b/test/test_pipeline/components/regression/test_sgd.py
index df31b3e026..467f3519f1 100644
--- a/test/test_pipeline/components/regression/test_sgd.py
+++ b/test/test_pipeline/components/regression/test_sgd.py
@@ -1,6 +1,7 @@
 import sklearn.linear_model
 
 from autosklearn.pipeline.components.regression.sgd import SGD
+
 from .test_base import BaseRegressionComponentTest
 
 
@@ -10,16 +11,16 @@ class SGDComponentTest(BaseRegressionComponentTest):
     # Values are extremely bad because the invscaling does not drop the
     # learning rate aggressively enough!
     res = dict()
-    res["default_boston"] = -1.1811672998629865e+28
+    res["default_boston"] = -1.1811672998629865e28
     res["boston_n_calls"] = 6
-    res["default_boston_iterative"] = res['default_boston']
-    res["default_boston_sparse"] = -1.1518512489347601e+28
-    res["default_boston_iterative_sparse"] = res['default_boston_sparse']
+    res["default_boston_iterative"] = res["default_boston"]
+    res["default_boston_sparse"] = -1.1518512489347601e28
+    res["default_boston_iterative_sparse"] = res["default_boston_sparse"]
     res["default_diabetes"] = 0.27420813549185374
     res["diabetes_n_calls"] = 10
-    res["default_diabetes_iterative"] = res['default_diabetes']
+    res["default_diabetes_iterative"] = res["default_diabetes"]
     res["default_diabetes_sparse"] = 0.034801785011824404
-    res["default_diabetes_iterative_sparse"] = res['default_diabetes_sparse']
+    res["default_diabetes_iterative_sparse"] = res["default_diabetes_sparse"]
 
     sk_mod = sklearn.linear_model.SGDRegressor
     module = SGD
diff --git a/test/test_pipeline/components/regression/test_support_vector_regression.py b/test/test_pipeline/components/regression/test_support_vector_regression.py
index 57cde050ed..84bea51da6 100644
--- a/test/test_pipeline/components/regression/test_support_vector_regression.py
+++ b/test/test_pipeline/components/regression/test_support_vector_regression.py
@@ -1,6 +1,7 @@
 import sklearn.linear_model
 
 from autosklearn.pipeline.components.regression.libsvm_svr import LibSVM_SVR
+
 from .test_base import BaseRegressionComponentTest
 
 
diff --git a/test/test_pipeline/components/test_base.py b/test/test_pipeline/components/test_base.py
index c53246cc77..1e6ddbbd14 100644
--- a/test/test_pipeline/components/test_base.py
+++ b/test/test_pipeline/components/test_base.py
@@ -2,20 +2,23 @@
 import sys
 import unittest
 
-from autosklearn.pipeline.components.base import find_components, \
-    AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.components.base import (
+    AutoSklearnClassificationAlgorithm,
+    find_components,
+)
 
 this_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(this_dir)
 
 
 class TestBase(unittest.TestCase):
-
     def test_find_components(self):
-        c = find_components('dummy_components',
-                            os.path.join(this_dir, 'dummy_components'),
-                            AutoSklearnClassificationAlgorithm)
-        print('COMPONENTS: %s' % repr(c))
+        c = find_components(
+            "dummy_components",
+            os.path.join(this_dir, "dummy_components"),
+            AutoSklearnClassificationAlgorithm,
+        )
+        print("COMPONENTS: %s" % repr(c))
         self.assertEqual(len(c), 2)
-        self.assertEqual(c['dummy_component_1'].__name__, 'DummyComponent1')
-        self.assertEqual(c['dummy_component_2'].__name__, 'DummyComponent2')
+        self.assertEqual(c["dummy_component_1"].__name__, "DummyComponent1")
+        self.assertEqual(c["dummy_component_2"].__name__, "DummyComponent2")
diff --git a/test/test_pipeline/ignored_warnings.py b/test/test_pipeline/ignored_warnings.py
index 5b941281f9..715cacb6ba 100644
--- a/test/test_pipeline/ignored_warnings.py
+++ b/test/test_pipeline/ignored_warnings.py
@@ -1,106 +1,116 @@
-from contextlib import contextmanager
-from typing import List, Iterator, Tuple
+from typing import Iterator, List, Tuple, Type
 
 import warnings
+from contextlib import contextmanager
 
 from sklearn.exceptions import ConvergenceWarning
 
-
 regressor_warnings = [
     (
-        UserWarning, (  # From QuantileTransformer
+        UserWarning,
+        (  # From QuantileTransformer
             r"n_quantiles \(\d+\) is greater than the total number of samples \(\d+\)\."
             r" n_quantiles is set to n_samples\."
-        )
+        ),
     ),
     (
-        ConvergenceWarning, (  # From GaussianProcesses
+        ConvergenceWarning,
+        (  # From GaussianProcesses
             r"The optimal value found for dimension \d+ of parameter \w+ is close"
             r" to the specified (upper|lower) bound .*(Increasing|Decreasing) the bound"
             r" and calling fit again may find a better value."
-        )
+        ),
     ),
     (
-        UserWarning, (  # From FastICA
-            r"n_components is too large: it will be set to \d+"
-        )
+        UserWarning,
+        (r"n_components is too large: it will be set to \d+"),  # From FastICA
     ),
     (
-        ConvergenceWarning, (  # From SGD
-            r"Maximum number of iteration reached before convergence\. Consider increasing"
-            r" max_iter to improve the fit\."
-        )
+        ConvergenceWarning,
+        (  # From SGD
+            r"Maximum number of iteration reached before convergence\."
+            r" Consider increasing max_iter to improve the fit\."
+        ),
     ),
     (
-        ConvergenceWarning, (  # From MLP
+        ConvergenceWarning,
+        (  # From MLP
             r"Stochastic Optimizer: Maximum iterations \(\d+\) reached and the"
             r" optimization hasn't converged yet\."
-        )
+        ),
     ),
 ]
 
 classifier_warnings = [
     (
-        UserWarning, (  # From QuantileTransformer
+        UserWarning,
+        (  # From QuantileTransformer
             r"n_quantiles \(\d+\) is greater than the total number of samples \(\d+\)\."
             r" n_quantiles is set to n_samples\."
-        )
+        ),
     ),
     (
-        UserWarning, (  # From FastICA
-            r"n_components is too large: it will be set to \d+"
-        )
-
+        UserWarning,
+        (r"n_components is too large: it will be set to \d+"),  # From FastICA
     ),
     (
-        ConvergenceWarning, (  # From Liblinear
+        ConvergenceWarning,
+        (  # From Liblinear
             r"Liblinear failed to converge, increase the number of iterations\."
-        )
+        ),
     ),
     (
-        ConvergenceWarning, (  # From SGD
-            r"Maximum number of iteration reached before convergence\. Consider increasing"
-            r" max_iter to improve the fit\."
-        )
+        ConvergenceWarning,
+        (  # From SGD
+            r"Maximum number of iteration reached before convergence\."
+            r"Consider increasing max_iter to improve the fit\."
+        ),
     ),
     (
-        ConvergenceWarning, (  # From MLP
+        ConvergenceWarning,
+        (  # From MLP
             r"Stochastic Optimizer: Maximum iterations \(\d+\) reached and the"
             r" optimization hasn't converged yet\."
-        )
+        ),
     ),
     (
-        ConvergenceWarning, (  # From FastICA
+        ConvergenceWarning,
+        (  # From FastICA
             r"FastICA did not converge\."
             r" Consider increasing tolerance or the maximum number of iterations\."
-        )
+        ),
     ),
     (
-        UserWarning, (  # From LDA (Linear Discriminant Analysis)
-            r"Variables are collinear"
-        )
+        UserWarning,
+        (r"Variables are collinear"),  # From LDA (Linear Discriminant Analysis)
     ),
     (
-        UserWarning, (
+        UserWarning,
+        (
             r"Clustering metrics expects discrete values but received continuous values"
             r" for label, and multiclass values for target"
-        )
-    )
+        ),
+    ),
 ]
 
 feature_preprocessing_warnings = [
     (
-        ConvergenceWarning, (  # From liblinear
+        ConvergenceWarning,
+        (  # From liblinear
             r"Liblinear failed to converge, increase the number of iterations."
-        )
+        ),
     )
 ]
 
-ignored_warnings = regressor_warnings + classifier_warnings + feature_preprocessing_warnings
+ignored_warnings = (
+    regressor_warnings + classifier_warnings + feature_preprocessing_warnings
+)
 
 
 @contextmanager
-def ignore_warnings(to_ignore: List[Tuple[Exception, str]] = ignored_warnings) -> Iterator[None]:
+def ignore_warnings(
+    to_ignore: List[Tuple[Type[Warning], str]] = ignored_warnings
+) -> Iterator[None]:
     """A context manager to ignore warnings
 
     >>> with ignore_warnings(classifier_warnings):
@@ -113,5 +123,5 @@ def ignore_warnings(to_ignore: List[Tuple[Exception, str]] = ignored_warnings) -
     """
     with warnings.catch_warnings():
         for category, message in to_ignore:
-            warnings.filterwarnings('ignore', category=category, message=message)
+            warnings.filterwarnings("ignore", category=category, message=message)
         yield
diff --git a/test/test_pipeline/implementations/__init__.py b/test/test_pipeline/implementations/__init__.py
index 8f0ce6cb7c..92bf78f389 100644
--- a/test/test_pipeline/implementations/__init__.py
+++ b/test/test_pipeline/implementations/__init__.py
@@ -1 +1 @@
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_pipeline/implementations/test_CategoryShift.py b/test/test_pipeline/implementations/test_CategoryShift.py
index 621d9b47cb..1b5e1451e6 100644
--- a/test/test_pipeline/implementations/test_CategoryShift.py
+++ b/test/test_pipeline/implementations/test_CategoryShift.py
@@ -1,4 +1,5 @@
 import unittest
+
 import numpy as np
 import scipy.sparse
 
@@ -6,7 +7,6 @@
 
 
 class CategoryShiftTest(unittest.TestCase):
-
     def test_dense(self):
         X = np.random.randint(0, 255, (3, 4))
         Y = CategoryShift().fit_transform(X)
@@ -14,7 +14,8 @@ def test_dense(self):
 
     def test_sparse(self):
         X = scipy.sparse.csc_matrix(
-            ([1, 2, 0, 4], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4))
+            ([1, 2, 0, 4], ([0, 1, 2, 1], [3, 2, 1, 0])), shape=(3, 4)
+        )
         Y = CategoryShift().fit_transform(X)
         X.data += 3
         self.assertTrue((Y.todense() == X.todense()).all())
@@ -29,6 +30,6 @@ def test_negative(self):
             CategoryShift().fit_transform(X)
 
     def test_string(self):
-        X = np.array([['a', 'b'], ['c', 'd']])
+        X = np.array([["a", "b"], ["c", "d"]])
         with self.assertRaises(ValueError):
             CategoryShift().fit_transform(X)
diff --git a/test/test_pipeline/implementations/test_MinorityCoalescer.py b/test/test_pipeline/implementations/test_MinorityCoalescer.py
index 73cbf9049a..7bdca8f1aa 100644
--- a/test/test_pipeline/implementations/test_MinorityCoalescer.py
+++ b/test/test_pipeline/implementations/test_MinorityCoalescer.py
@@ -1,24 +1,25 @@
 import unittest
-import numpy as np
 
+import numpy as np
 import scipy.sparse
 
 from autosklearn.pipeline.implementations.MinorityCoalescer import MinorityCoalescer
 
 
 class MinorityCoalescerTest(unittest.TestCase):
-
     @property
     def X1(self):
         # Generates an array with categories 3, 4, 5, 6, 7 and occurences of 30%,
         # 30%, 30%, 5% and 5% respectively
-        X = np.vstack((
-            np.ones((30, 10)) * 3,
-            np.ones((30, 10)) * 4,
-            np.ones((30, 10)) * 5,
-            np.ones((5, 10)) * 6,
-            np.ones((5, 10)) * 7,
-        ))
+        X = np.vstack(
+            (
+                np.ones((30, 10)) * 3,
+                np.ones((30, 10)) * 4,
+                np.ones((30, 10)) * 5,
+                np.ones((5, 10)) * 6,
+                np.ones((5, 10)) * 7,
+            )
+        )
         for col in range(X.shape[1]):
             np.random.shuffle(X[:, col])
         return X
@@ -27,13 +28,15 @@ def X1(self):
     def X2(self):
         # Generates an array with categories 3, 4, 5, 6, 7 and occurences of 5%,
         # 5%, 5%, 35% and 50% respectively
-        X = np.vstack((
-            np.ones((5, 10)) * 3,
-            np.ones((5, 10)) * 4,
-            np.ones((5, 10)) * 5,
-            np.ones((35, 10)) * 6,
-            np.ones((50, 10)) * 7,
-        ))
+        X = np.vstack(
+            (
+                np.ones((5, 10)) * 3,
+                np.ones((5, 10)) * 4,
+                np.ones((5, 10)) * 5,
+                np.ones((35, 10)) * 6,
+                np.ones((50, 10)) * 7,
+            )
+        )
         for col in range(X.shape[1]):
             np.random.shuffle(X[:, col])
         return X
@@ -48,7 +51,7 @@ def test_default(self):
 
     def test_coalesce_10_percent(self):
         X = self.X1
-        Y = MinorityCoalescer(minimum_fraction=.1).fit_transform(X)
+        Y = MinorityCoalescer(minimum_fraction=0.1).fit_transform(X)
         for col in range(Y.shape[1]):
             hist = np.histogram(Y[:, col], bins=np.arange(1, 7))
             np.testing.assert_array_almost_equal(hist[0], [10, 0, 30, 30, 30])
@@ -57,7 +60,7 @@ def test_coalesce_10_percent(self):
 
     def test_coalesce_10_percent_sparse(self):
         X = scipy.sparse.csc_matrix(self.X1)
-        Y = MinorityCoalescer(minimum_fraction=.1).fit_transform(X)
+        Y = MinorityCoalescer(minimum_fraction=0.1).fit_transform(X)
         # Assert no copies were made
         self.assertEqual(id(X), id(Y))
         Y = Y.todense()
@@ -75,7 +78,7 @@ def test_transform_after_fit(self):
         X_fit = self.X1  # Here categories 3, 4, 5 have ocurrence above 10%
         X_transf = self.X2  # Here it is the opposite, just categs 6 and 7 are above 10%
 
-        mc = MinorityCoalescer(minimum_fraction=.1).fit(X_fit)
+        mc = MinorityCoalescer(minimum_fraction=0.1).fit(X_fit)
 
         # transform() should coalesce categories as learned during fit.
         # Category distribution in X_transf should be irrelevant.
diff --git a/test/test_pipeline/implementations/test_SparseOneHotEncoder.py b/test/test_pipeline/implementations/test_SparseOneHotEncoder.py
index 731533637b..91f1827c06 100644
--- a/test/test_pipeline/implementations/test_SparseOneHotEncoder.py
+++ b/test/test_pipeline/implementations/test_SparseOneHotEncoder.py
@@ -1,38 +1,37 @@
 import unittest
 
 import numpy as np
-
 import scipy.sparse
-import sklearn.tree
 import sklearn.datasets
 import sklearn.model_selection
 import sklearn.pipeline
+import sklearn.tree
 from sklearn.impute import SimpleImputer
 from sklearn.tree import DecisionTreeClassifier
 
-from autosklearn.pipeline.implementations.SparseOneHotEncoder import SparseOneHotEncoder
 from autosklearn.pipeline.implementations.CategoryShift import CategoryShift
+from autosklearn.pipeline.implementations.SparseOneHotEncoder import SparseOneHotEncoder
 
-sparse1 = scipy.sparse.csc_matrix(([3, 2, 1, 1, 2, 3],
-                                   ((1, 4, 5, 2, 3, 5),
-                                    (0, 0, 0, 1, 1, 1))), shape=(6, 2))
-sparse1_1h = scipy.sparse.csc_matrix(([1, 1, 1, 1, 1, 1],
-                                      ((5, 4, 1, 2, 3, 5),
-                                       (0, 1, 2, 3, 4, 5))), shape=(6, 6))
+sparse1 = scipy.sparse.csc_matrix(
+    ([3, 2, 1, 1, 2, 3], ((1, 4, 5, 2, 3, 5), (0, 0, 0, 1, 1, 1))), shape=(6, 2)
+)
+sparse1_1h = scipy.sparse.csc_matrix(
+    ([1, 1, 1, 1, 1, 1], ((5, 4, 1, 2, 3, 5), (0, 1, 2, 3, 4, 5))), shape=(6, 6)
+)
 
-sparse2 = scipy.sparse.csc_matrix(([2, 1, 0, 0, 0, 0],
-                                   ((1, 4, 5, 2, 3, 5),
-                                    (0, 0, 0, 1, 1, 1))), shape=(6, 2))
-sparse2_1h = scipy.sparse.csc_matrix(([1, 1, 1, 1, 1, 1],
-                                      ((5, 4, 1, 2, 3, 5),
-                                       (0, 1, 2, 3, 3, 3))), shape=(6, 4))
+sparse2 = scipy.sparse.csc_matrix(
+    ([2, 1, 0, 0, 0, 0], ((1, 4, 5, 2, 3, 5), (0, 0, 0, 1, 1, 1))), shape=(6, 2)
+)
+sparse2_1h = scipy.sparse.csc_matrix(
+    ([1, 1, 1, 1, 1, 1], ((5, 4, 1, 2, 3, 5), (0, 1, 2, 3, 3, 3))), shape=(6, 4)
+)
 
-sparse2_csr = scipy.sparse.csr_matrix(([2, 1, 0, 0, 0, 0],
-                                      ((1, 4, 5, 2, 3, 5),
-                                       (0, 0, 0, 1, 1, 1))), shape=(6, 2))
-sparse2_csr_1h = scipy.sparse.csr_matrix(([1, 1, 1, 1, 1, 1],
-                                         ((5, 4, 1, 2, 3, 5),
-                                          (0, 1, 2, 3, 3, 3))), shape=(6, 4))
+sparse2_csr = scipy.sparse.csr_matrix(
+    ([2, 1, 0, 0, 0, 0], ((1, 4, 5, 2, 3, 5), (0, 0, 0, 1, 1, 1))), shape=(6, 2)
+)
+sparse2_csr_1h = scipy.sparse.csr_matrix(
+    ([1, 1, 1, 1, 1, 1], ((5, 4, 1, 2, 3, 5), (0, 1, 2, 3, 3, 3))), shape=(6, 4)
+)
 
 
 class TestSparseOneHotEncoder(unittest.TestCase):
@@ -52,8 +51,7 @@ def _fit_then_transform(self, expected, input):
         transformation = ohe.fit_transform(input)
         self.assertIsInstance(transformation, scipy.sparse.csr_matrix)
         np.testing.assert_array_almost_equal(
-            expected.astype(float),
-            transformation.todense()
+            expected.astype(float), transformation.todense()
         )
         self._check_arrays_equal(input, input_copy)
 
@@ -90,23 +88,26 @@ def test_transform_with_unknown_value(self):
         self.assertEqual(3, np.sum(output))
 
     def test_classification_workflow(self):
-        X, y = sklearn.datasets.fetch_openml(data_id=24, as_frame=False, return_X_y=True)
+        X, y = sklearn.datasets.fetch_openml(
+            data_id=24, as_frame=False, return_X_y=True
+        )
         print(type(X))
 
-        X_train, X_test, y_train, y_test = \
-            sklearn.model_selection.train_test_split(X, y, random_state=3,
-                                                     train_size=0.5,
-                                                     test_size=0.5)
+        X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+            X, y, random_state=3, train_size=0.5, test_size=0.5
+        )
 
         X_train = scipy.sparse.csc_matrix(X_train)
         X_test = scipy.sparse.csc_matrix(X_test)
 
-        pipeline = sklearn.pipeline.Pipeline((
-            ('shift', CategoryShift()),
-            ('imput', SimpleImputer(strategy='constant', fill_value=2)),
-            ('ohe', SparseOneHotEncoder()),
-            ('tree', DecisionTreeClassifier(random_state=1)),
-            ))
+        pipeline = sklearn.pipeline.Pipeline(
+            (
+                ("shift", CategoryShift()),
+                ("imput", SimpleImputer(strategy="constant", fill_value=2)),
+                ("ohe", SparseOneHotEncoder()),
+                ("tree", DecisionTreeClassifier(random_state=1)),
+            )
+        )
 
         pipeline.fit(X_train, y_train)
         pred_train = pipeline.predict(X_train)
diff --git a/test/test_pipeline/implementations/test_util.py b/test/test_pipeline/implementations/test_util.py
index 06f2a1eb2f..58412e0b0c 100644
--- a/test/test_pipeline/implementations/test_util.py
+++ b/test/test_pipeline/implementations/test_util.py
@@ -7,19 +7,44 @@
 
 class UtilTest(unittest.TestCase):
     def test_softmax_binary(self):
-        df = np.array([-40.00643897, 34.69754581, 23.71181359, -29.89724287,
-                       27.06071791, -37.78334103, -40.15812461, 40.16139229,
-                       -27.85887801, 42.67404756, -36.89753589, -36.45148009,
-                       54.68976306, 19.47886562, -49.99821027, -35.70205302,
-                       -40.59639267, 32.96343916, -39.23777841, -37.86535019,
-                       -33.10196906, 26.84144377, -36.8569686])
+        df = np.array(
+            [
+                -40.00643897,
+                34.69754581,
+                23.71181359,
+                -29.89724287,
+                27.06071791,
+                -37.78334103,
+                -40.15812461,
+                40.16139229,
+                -27.85887801,
+                42.67404756,
+                -36.89753589,
+                -36.45148009,
+                54.68976306,
+                19.47886562,
+                -49.99821027,
+                -35.70205302,
+                -40.59639267,
+                32.96343916,
+                -39.23777841,
+                -37.86535019,
+                -33.10196906,
+                26.84144377,
+                -36.8569686,
+            ]
+        )
         probas = softmax(df)
-        expected = [[1., 0.] if d < 0. else [0., 1.] for d in df]
+        expected = [[1.0, 0.0] if d < 0.0 else [0.0, 1.0] for d in df]
         np.testing.assert_array_almost_equal(expected, probas)
 
     def test_softmax(self):
-        df = np.array([[2.75021367e+10, -8.83772371e-01, -2.20516715e+27],
-                       [-2.10848072e+11, 2.35024444e-01, 5.20106536e+25]])
+        df = np.array(
+            [
+                [2.75021367e10, -8.83772371e-01, -2.20516715e27],
+                [-2.10848072e11, 2.35024444e-01, 5.20106536e25],
+            ]
+        )
         # With a numerically unstable softmax, the output would be something
         # like this:
         # [[  0.   0.  nan]
@@ -30,6 +55,7 @@ def test_softmax(self):
 
         df = np.array([[0.1, 0.6, 0.3], [0.2, 0.3, 0.5]])
         probas = softmax(df)
-        expected = np.array([[0.25838965, 0.42601251, 0.31559783],
-                             [0.28943311, 0.31987306, 0.39069383]])
+        expected = np.array(
+            [[0.25838965, 0.42601251, 0.31559783], [0.28943311, 0.31987306, 0.39069383]]
+        )
         np.testing.assert_array_almost_equal(expected, probas)
diff --git a/test/test_pipeline/test_base.py b/test/test_pipeline/test_base.py
index 0d40bca0d1..f1efed23b4 100644
--- a/test/test_pipeline/test_base.py
+++ b/test/test_pipeline/test_base.py
@@ -5,8 +5,8 @@
 
 import autosklearn.pipeline.base
 import autosklearn.pipeline.components.base
-import autosklearn.pipeline.components.feature_preprocessing
-import autosklearn.pipeline.components.classification
+import autosklearn.pipeline.components.classification as classification
+import autosklearn.pipeline.components.feature_preprocessing as feature_preprocessing
 
 
 class BasePipelineMock(autosklearn.pipeline.base.BasePipeline):
@@ -17,42 +17,45 @@ def __init__(self):
 class BaseTest(unittest.TestCase):
     def test_get_hyperparameter_configuration_space_3choices(self):
         cs = ConfigSpace.configuration_space.ConfigurationSpace()
-        dataset_properties = {'target_type': 'classification'}
+        dataset_properties = {"target_type": "classification"}
         exclude = {}
         include = {}
-        pipeline = [('p0',
-                     autosklearn.pipeline.components.feature_preprocessing
-                     .FeaturePreprocessorChoice(dataset_properties)),
-                    ('p1',
-                     autosklearn.pipeline.components.feature_preprocessing
-                     .FeaturePreprocessorChoice(dataset_properties)),
-                    ('c', autosklearn.pipeline.components.classification
-                     .ClassifierChoice(dataset_properties))]
+        pipeline = [
+            (
+                "p0",
+                feature_preprocessing.FeaturePreprocessorChoice(dataset_properties),
+            ),
+            (
+                "p1",
+                feature_preprocessing.FeaturePreprocessorChoice(dataset_properties),
+            ),
+            (
+                "c",
+                classification.ClassifierChoice(dataset_properties),
+            ),
+        ]
 
         base = BasePipelineMock()
-        cs = base._get_base_search_space(cs, dataset_properties,
-                                         exclude, include, pipeline)
+        cs = base._get_base_search_space(
+            cs, dataset_properties, exclude, include, pipeline
+        )
 
-        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices),
-                         13)
-        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices),
-                         15)
+        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
+        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
 
         # for clause in sorted([str(clause) for clause in cs.forbidden_clauses]):
         #     print(clause)
         self.assertEqual(148, len(cs.forbidden_clauses))
 
         cs = ConfigSpace.configuration_space.ConfigurationSpace()
-        dataset_properties = {'target_type': 'classification', 'signed': True}
-        include = {'c': ['multinomial_nb']}
-        cs = base._get_base_search_space(cs, dataset_properties,
-                                         exclude, include, pipeline)
-        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices),
-                         13)
-        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices),
-                         10)
-        self.assertEqual(len(cs.get_hyperparameter("c:__choice__").choices),
-                         1)
+        dataset_properties = {"target_type": "classification", "signed": True}
+        include = {"c": ["multinomial_nb"]}
+        cs = base._get_base_search_space(
+            cs, dataset_properties, exclude, include, pipeline
+        )
+        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
+        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 10)
+        self.assertEqual(len(cs.get_hyperparameter("c:__choice__").choices), 1)
         # Mostly combinations of p0 making the data unsigned and p1 not
         # changing the values of the data points
         # for clause in sorted([str(clause) for clause in cs.forbidden_clauses]):
@@ -60,42 +63,41 @@ def test_get_hyperparameter_configuration_space_3choices(self):
         self.assertEqual(64, len(cs.forbidden_clauses))
 
         cs = ConfigSpace.configuration_space.ConfigurationSpace()
-        dataset_properties = {'target_type': 'classification', 'signed': True}
+        dataset_properties = {"target_type": "classification", "signed": True}
         include = {}
-        cs = base._get_base_search_space(cs, dataset_properties,
-                                         exclude, include, pipeline)
-        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices),
-                         13)
-        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices),
-                         15)
-        self.assertEqual(len(cs.get_hyperparameter("c:__choice__").choices),
-                         16)
+        cs = base._get_base_search_space(
+            cs, dataset_properties, exclude, include, pipeline
+        )
+        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 13)
+        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
+        self.assertEqual(len(cs.get_hyperparameter("c:__choice__").choices), 16)
         # for clause in sorted([str(clause) for clause in cs.forbidden_clauses]):
         #    print(clause)
         self.assertEqual(110, len(cs.forbidden_clauses))
 
         cs = ConfigSpace.configuration_space.ConfigurationSpace()
-        dataset_properties = {'target_type': 'classification', 'sparse': True}
-        cs = base._get_base_search_space(cs, dataset_properties,
-                                         exclude, include, pipeline)
-        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices),
-                         12)
-        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices),
-                         15)
+        dataset_properties = {"target_type": "classification", "sparse": True}
+        cs = base._get_base_search_space(
+            cs, dataset_properties, exclude, include, pipeline
+        )
+        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12)
+        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
         # for clause in sorted([str(clause) for clause in cs.forbidden_clauses]):
         #    print(clause)
         self.assertEqual(419, len(cs.forbidden_clauses))
 
         cs = ConfigSpace.configuration_space.ConfigurationSpace()
-        dataset_properties = {'target_type': 'classification',
-                              'sparse': True, 'signed': True}
-        cs = base._get_base_search_space(cs, dataset_properties,
-                                         exclude, include, pipeline)
-
-        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices),
-                         12)
-        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices),
-                         15)
+        dataset_properties = {
+            "target_type": "classification",
+            "sparse": True,
+            "signed": True,
+        }
+        cs = base._get_base_search_space(
+            cs, dataset_properties, exclude, include, pipeline
+        )
+
+        self.assertEqual(len(cs.get_hyperparameter("p0:__choice__").choices), 12)
+        self.assertEqual(len(cs.get_hyperparameter("p1:__choice__").choices), 15)
         # Data is guaranteed to be positive in cases like densifier,
         # extra_trees_preproc, multinomial_nb -> less constraints
         # for clause in sorted([str(clause) for clause in cs.forbidden_clauses]):
@@ -123,52 +125,71 @@ def test_init_params_handling(self):
             for init_params, expected_init_params in [
                 ({}, {}),
                 (None, None),
-                ({'M:key': 'value'}, {'key': 'value'}),
+                ({"M:key": "value"}, {"key": "value"}),
             ]:
                 node = unittest.mock.Mock(
                     spec=autosklearn.pipeline.components.base.AutoSklearnComponent
                 )
                 node.get_hyperparameter_search_space.return_value = cs
-                node.key = 'value'
-                base.steps = [('M', node)]
-                base.set_hyperparameters(cs.sample_configuration(), init_params=init_params)
-                self.assertEqual(node.set_hyperparameters.call_args[1]['init_params'],
-                                 expected_init_params)
+                node.key = "value"
+                base.steps = [("M", node)]
+                base.set_hyperparameters(
+                    cs.sample_configuration(), init_params=init_params
+                )
+                self.assertEqual(
+                    node.set_hyperparameters.call_args[1]["init_params"],
+                    expected_init_params,
+                )
 
             # Check for proper exception raising
             node = unittest.mock.Mock(
                 spec=autosklearn.pipeline.components.base.AutoSklearnComponent
             )
             node.get_hyperparameter_search_space.return_value = cs
-            base.steps = [('M', node)]
-            with self.assertRaisesRegex(ValueError, "Unsupported argument to init_params"):
-                base.set_hyperparameters(cs.sample_configuration(), init_params={'key': 'value'})
+            base.steps = [("M", node)]
+            with self.assertRaisesRegex(
+                ValueError, "Unsupported argument to init_params"
+            ):
+                base.set_hyperparameters(
+                    cs.sample_configuration(), init_params={"key": "value"}
+                )
 
             # An invalid node name is passed
-            with self.assertRaisesRegex(ValueError, "The current node name specified via key"):
-                base.set_hyperparameters(cs.sample_configuration(), init_params={'N:key': 'value'})
+            with self.assertRaisesRegex(
+                ValueError, "The current node name specified via key"
+            ):
+                base.set_hyperparameters(
+                    cs.sample_configuration(), init_params={"N:key": "value"}
+                )
 
             # The value was not properly set -- Here it happens because the
             # object is a magic mock, calling the method doesn't set a new parameter
             with self.assertRaisesRegex(ValueError, "Cannot properly set the pair"):
-                base.set_hyperparameters(cs.sample_configuration(), init_params={'M:key': 'value'})
+                base.set_hyperparameters(
+                    cs.sample_configuration(), init_params={"M:key": "value"}
+                )
 
     def test_include_exclude_validation(self):
-        """
-        Makes sure include and exclude arguments are validated and raises expected exception
-        on error
+        """Makes sure include and exclude arguments are validated and raises
+        expected exception on error
         """
         base = BasePipelineMock()
-        dataset_properties = {'target_type': 'classification'}
+        dataset_properties = {"target_type": "classification"}
         base.dataset_properties = dataset_properties
-        base.steps = [('p0',
-                       autosklearn.pipeline.components.feature_preprocessing
-                       .FeaturePreprocessorChoice(dataset_properties)),
-                      ('p1',
-                       autosklearn.pipeline.components.feature_preprocessing
-                       .FeaturePreprocessorChoice(dataset_properties)),
-                      ('c', autosklearn.pipeline.components.classification
-                       .ClassifierChoice(dataset_properties))]
+        base.steps = [
+            (
+                "p0",
+                feature_preprocessing.FeaturePreprocessorChoice(dataset_properties),
+            ),
+            (
+                "p1",
+                feature_preprocessing.FeaturePreprocessorChoice(dataset_properties),
+            ),
+            (
+                "c",
+                classification.ClassifierChoice(dataset_properties),
+            ),
+        ]
 
         def assert_value_error(include=None, exclude=None):
             base.include = include
@@ -177,21 +198,21 @@ def assert_value_error(include=None, exclude=None):
                 base._validate_include_exclude_params()
 
         # Same key in include and exclude argument
-        assert_value_error(include={'c': ['adaboost']}, exclude={'c': ['sgd']})
+        assert_value_error(include={"c": ["adaboost"]}, exclude={"c": ["sgd"]})
 
         # Invalid key in the exclude argument
-        assert_value_error(exclude={'p2': ['pca']})
+        assert_value_error(exclude={"p2": ["pca"]})
 
         # Invalid value type for the key in the include argument
-        assert_value_error(include={'c': ('adaboost', 'sgd')}, exclude=None)
+        assert_value_error(include={"c": ("adaboost", "sgd")}, exclude=None)
 
         # Empty list of the key in the include argument
-        assert_value_error(include={'c': []})
+        assert_value_error(include={"c": []})
 
         # Invalid component in the list value for the key in the include argument
-        assert_value_error(include={'c': ['pca']})
+        assert_value_error(include={"c": ["pca"]})
 
         # Case when all conditions passed for include and exclude
-        base.include = {'c': ['adaboost', 'sgd']}
-        base.exclude = {'p1': ['pca']}
+        base.include = {"c": ["adaboost", "sgd"]}
+        base.exclude = {"p1": ["pca"]}
         self.assertIsNone(base._validate_include_exclude_params())
diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py
index 49267b0fee..c197dd30fc 100644
--- a/test/test_pipeline/test_classification.py
+++ b/test/test_pipeline/test_classification.py
@@ -8,29 +8,37 @@
 import unittest
 import unittest.mock
 
-from joblib import Memory
 import numpy as np
-
-from sklearn.base import clone
 import sklearn.datasets
 import sklearn.decomposition
-import sklearn.model_selection
 import sklearn.ensemble
+import sklearn.model_selection
 import sklearn.svm
-from sklearn.utils.validation import check_is_fitted
-
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
+from joblib import Memory
+from sklearn.base import clone
+from sklearn.utils.validation import check_is_fitted
 
-from autosklearn.pipeline.classification import SimpleClassificationPipeline
-from autosklearn.pipeline.components.base import \
-    AutoSklearnClassificationAlgorithm, AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.components.base import AutoSklearnComponent, AutoSklearnChoice, _addons
 import autosklearn.pipeline.components.classification as classification_components
 import autosklearn.pipeline.components.feature_preprocessing as preprocessing_components
+from autosklearn.pipeline.classification import SimpleClassificationPipeline
+from autosklearn.pipeline.components.base import (
+    AutoSklearnChoice,
+    AutoSklearnClassificationAlgorithm,
+    AutoSklearnComponent,
+    AutoSklearnPreprocessingAlgorithm,
+    _addons,
+)
+from autosklearn.pipeline.constants import (
+    DENSE,
+    INPUT,
+    PREDICTIONS,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
 from autosklearn.pipeline.util import get_dataset
-from autosklearn.pipeline.constants import \
-    DENSE, SPARSE, UNSIGNED_DATA, PREDICTIONS, SIGNED_DATA, INPUT
 
 from test.test_pipeline.ignored_warnings import classifier_warnings, ignore_warnings
 
@@ -38,16 +46,18 @@
 class DummyClassifier(AutoSklearnClassificationAlgorithm):
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'AB',
-                'name': 'AdaBoost Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
+        return {
+            "shortname": "AB",
+            "name": "AdaBoost Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (PREDICTIONS,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -58,16 +68,18 @@ def get_hyperparameter_search_space(dataset_properties=None):
 class DummyPreprocessor(AutoSklearnPreprocessingAlgorithm):
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'AB',
-                'name': 'AdaBoost Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "AB",
+            "name": "AdaBoost Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
@@ -81,16 +93,18 @@ def __init__(*args, **kwargs):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'AB',
-                'name': 'AdaBoost Classifier',
-                'handles_regression': False,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'handles_multioutput': False,
-                'is_deterministic': True,
-                'input': (DENSE, SPARSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
+        return {
+            "shortname": "AB",
+            "name": "AdaBoost Classifier",
+            "handles_regression": False,
+            "handles_classification": True,
+            "handles_multiclass": True,
+            "handles_multilabel": True,
+            "handles_multioutput": False,
+            "is_deterministic": True,
+            "input": (DENSE, SPARSE, UNSIGNED_DATA),
+            "output": (INPUT,),
+        }
 
     def fit(self, X, y):
         raise ValueError("Make sure fit is called")
@@ -116,21 +130,21 @@ def test_io_dict(self):
             if classifiers[c] == classification_components.ClassifierChoice:
                 continue
             props = classifiers[c].get_properties()
-            self.assertIn('input', props)
-            self.assertIn('output', props)
-            inp = props['input']
-            output = props['output']
+            self.assertIn("input", props)
+            self.assertIn("output", props)
+            inp = props["input"]
+            output = props["output"]
 
             self.assertIsInstance(inp, tuple)
             self.assertIsInstance(output, tuple)
             for i in inp:
                 self.assertIn(i, (SPARSE, DENSE, SIGNED_DATA, UNSIGNED_DATA))
             self.assertEqual(output, (PREDICTIONS,))
-            self.assertIn('handles_regression', props)
-            self.assertFalse(props['handles_regression'])
-            self.assertIn('handles_classification', props)
-            self.assertIn('handles_multiclass', props)
-            self.assertIn('handles_multilabel', props)
+            self.assertIn("handles_regression", props)
+            self.assertFalse(props["handles_regression"])
+            self.assertIn("handles_classification", props)
+            self.assertIn("handles_multiclass", props)
+            self.assertIn("handles_multilabel", props)
 
     def test_find_classifiers(self):
         """Test that the classifier components can be found
@@ -143,9 +157,11 @@ def test_find_classifiers(self):
         classifiers = classification_components._classifiers
         self.assertGreaterEqual(len(classifiers), 2)
         for key in classifiers:
-            if hasattr(classifiers[key], 'get_components'):
+            if hasattr(classifiers[key], "get_components"):
                 continue
-            self.assertIn(AutoSklearnClassificationAlgorithm, classifiers[key].__bases__)
+            self.assertIn(
+                AutoSklearnClassificationAlgorithm, classifiers[key].__bases__
+            )
 
     def test_find_preprocessors(self):
         """Test that preproccesor components can be found
@@ -156,20 +172,23 @@ def test_find_preprocessors(self):
         * The inherit from AutoSklearnPreprocessingAlgorithm
         """
         preprocessors = preprocessing_components._preprocessors
-        self.assertGreaterEqual(len(preprocessors),  1)
+        self.assertGreaterEqual(len(preprocessors), 1)
         for key in preprocessors:
-            if hasattr(preprocessors[key], 'get_components'):
+            if hasattr(preprocessors[key], "get_components"):
                 continue
-            self.assertIn(AutoSklearnPreprocessingAlgorithm, preprocessors[key].__bases__)
+            self.assertIn(
+                AutoSklearnPreprocessingAlgorithm, preprocessors[key].__bases__
+            )
 
     def test_default_configuration(self):
         """Test that seeded SimpleClassificaitonPipeline returns good results on iris
 
         Expects
         -------
-        * The performance of configuration with fixed seed gets above 96% accuracy on iris
+        * The performance of configuration with fixed seed gets above 96% accuracy
+            on iris
         """
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="iris")
 
         auto = SimpleClassificationPipeline(random_state=1)
 
@@ -190,11 +209,12 @@ def test_default_configuration_multilabel(self):
         * The performance of a random configuratino gets above 96% on a multilabel
             version of iris
         """
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris', make_multilabel=True)
+        X_train, Y_train, X_test, Y_test = get_dataset(
+            dataset="iris", make_multilabel=True
+        )
 
         classifier = SimpleClassificationPipeline(
-            dataset_properties={'multilabel': True},
-            random_state=0
+            dataset_properties={"multilabel": True}, random_state=0
         )
         cs = classifier.get_hyperparameter_search_space()
 
@@ -218,14 +238,14 @@ def test_default_configuration_iterative_fit(self):
         * Random forest pipeline can be fit iteratively
         * Test that its number of estimators is equal to the iteration count
         """
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="iris")
 
         classifier = SimpleClassificationPipeline(
             include={
-                'classifier': ['random_forest'],
-                'feature_preprocessor': ['no_preprocessing']
+                "classifier": ["random_forest"],
+                "feature_preprocessor": ["no_preprocessing"],
             },
-            random_state=0
+            random_state=0,
         )
         classifier.fit_transformer(X_train, Y_train)
 
@@ -256,9 +276,7 @@ def test_multilabel(self):
         * All configurations should fit, predict and predict_proba successfully
         """
         cache = Memory(location=tempfile.gettempdir())
-        cached_func = cache.cache(
-            sklearn.datasets.make_multilabel_classification
-        )
+        cached_func = cache.cache(sklearn.datasets.make_multilabel_classification)
         X, Y = cached_func(
             n_samples=150,
             n_features=20,
@@ -269,14 +287,16 @@ def test_multilabel(self):
             sparse=False,
             return_indicator=True,
             return_distributions=False,
-            random_state=1
+            random_state=1,
         )
 
         data = {
-            'X_train': X[:100, :],
-            'Y_train': Y[:100, :],
-            'X_test': X[101:, :],
-            'Y_test': Y[101:, ]
+            "X_train": X[:100, :],
+            "Y_train": Y[:100, :],
+            "X_test": X[101:, :],
+            "Y_test": Y[
+                101:,
+            ],
         }
 
         pipeline = SimpleClassificationPipeline(dataset_properties={"multilabel": True})
@@ -301,12 +321,14 @@ def test_configurations_signed_data(self):
         -------
         * All configurations should fit, predict and predict_proba successfully
         """
-        dataset_properties = {'signed': True}
+        dataset_properties = {"signed": True}
 
         cls = SimpleClassificationPipeline(dataset_properties=dataset_properties)
         cs = cls.get_hyperparameter_search_space()
 
-        self._test_configurations(configurations_space=cs, dataset_properties=dataset_properties)
+        self._test_configurations(
+            configurations_space=cs, dataset_properties=dataset_properties
+        )
 
     def test_configurations_sparse(self):
         """Tests a non-seeded random set of configurations with sparse data
@@ -315,7 +337,7 @@ def test_configurations_sparse(self):
         -------
         * All configurations should fit, predict and predict_proba successfully
         """
-        pipeline = SimpleClassificationPipeline(dataset_properties={'sparse': True})
+        pipeline = SimpleClassificationPipeline(dataset_properties={"sparse": True})
         cs = pipeline.get_hyperparameter_search_space()
 
         self._test_configurations(configurations_space=cs, make_sparse=True)
@@ -330,41 +352,89 @@ def test_configurations_categorical_data(self):
         * All configurations should fit, predict and predict_proba successfully
         """
         pipeline = SimpleClassificationPipeline(
-            dataset_properties={'sparse': False},
+            dataset_properties={"sparse": False},
             include={
-                'feature_preprocessor': ['no_preprocessing'],
-                'classifier': ['sgd', 'adaboost']
-            }
+                "feature_preprocessor": ["no_preprocessing"],
+                "classifier": ["sgd", "adaboost"],
+            },
         )
 
         cs = pipeline.get_hyperparameter_search_space()
 
         categorical_columns = [
-            True, True, True, False, False, True, True, True, False, True, True, True, True,
-            True, True, True, True, True, True, True, True, True, True, True, True, True,
-            True, True, True, True, True, True, False, False, False, True, True, True
+            True,
+            True,
+            True,
+            False,
+            False,
+            True,
+            True,
+            True,
+            False,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            True,
+            False,
+            False,
+            False,
+            True,
+            True,
+            True,
         ]
         categorical = {
-            i: 'categorical' if is_categorical else 'numerical'
+            i: "categorical" if is_categorical else "numerical"
             for i, is_categorical in enumerate(categorical_columns)
         }
 
         here = os.path.dirname(__file__)
-        dataset_path = os.path.join(here, "components", "data_preprocessing", "dataset.pkl")
+        dataset_path = os.path.join(
+            here, "components", "data_preprocessing", "dataset.pkl"
+        )
 
         X = np.loadtxt(dataset_path)
         y = X[:, -1].copy()
         X = X[:, :-1]
-        X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, y)
+        X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
+            X, y
+        )
 
-        data = {'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test}
+        data = {
+            "X_train": X_train,
+            "Y_train": Y_train,
+            "X_test": X_test,
+            "Y_test": Y_test,
+        }
 
-        init_params = {'data_preprocessor:feat_type': categorical}
+        init_params = {"data_preprocessor:feat_type": categorical}
 
-        self._test_configurations(configurations_space=cs, dataset=data, init_params=init_params)
+        self._test_configurations(
+            configurations_space=cs, dataset=data, init_params=init_params
+        )
 
-    @unittest.mock.patch('autosklearn.pipeline.components.data_preprocessing'
-                         '.DataPreprocessorChoice.set_hyperparameters')
+    @unittest.mock.patch(
+        "autosklearn.pipeline.components.data_preprocessing"
+        ".DataPreprocessorChoice.set_hyperparameters"
+    )
     def test_categorical_passed_to_one_hot_encoder(self, ohe_mock):
         """Test that the feat_types arg is passed to the OneHotEncoder
 
@@ -379,36 +449,38 @@ def test_categorical_passed_to_one_hot_encoder(self, ohe_mock):
 
         # Mock the _check_init_params_honored as there is no object created,
         # _check_init_params_honored will fail as a datapreprocessor was never created
-        with unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline'
-                                 '._check_init_params_honored'):
+        with unittest.mock.patch(
+            "autosklearn.pipeline.classification.SimpleClassificationPipeline"
+            "._check_init_params_honored"
+        ):
 
             # Check through construction
-            feat_types = {0: 'categorical', 1: 'numerical'}
+            feat_types = {0: "categorical", 1: "numerical"}
 
             cls = SimpleClassificationPipeline(
-                init_params={'data_preprocessor:feat_type': feat_types}
+                init_params={"data_preprocessor:feat_type": feat_types}
             )
 
-            init_args = ohe_mock.call_args[1]['init_params']
-            self.assertEqual(init_args, {'feat_type': feat_types})
+            init_args = ohe_mock.call_args[1]["init_params"]
+            self.assertEqual(init_args, {"feat_type": feat_types})
 
             # Check through `set_hyperparameters`
-            feat_types = {0: 'categorical', 1: 'categorical', 2: 'numerical'}
+            feat_types = {0: "categorical", 1: "categorical", 2: "numerical"}
 
             default = cls.get_hyperparameter_search_space().get_default_configuration()
             cls.set_hyperparameters(
                 configuration=default,
-                init_params={'data_preprocessor:feat_type': feat_types},
+                init_params={"data_preprocessor:feat_type": feat_types},
             )
 
-            init_args = ohe_mock.call_args[1]['init_params']
-            self.assertEqual(init_args, {'feat_type': feat_types})
+            init_args = ohe_mock.call_args[1]["init_params"]
+            self.assertEqual(init_args, {"feat_type": feat_types})
 
     def _test_configurations(
         self,
         configurations_space: ConfigurationSpace,
         make_sparse: bool = False,
-        dataset: Union[str, Dict[str, Any]] = 'digits',
+        dataset: Union[str, Dict[str, Any]] = "digits",
         init_params: Dict[str, Any] = None,
         dataset_properties: Dict[str, Any] = None,
         n_samples: int = 10,
@@ -448,53 +520,55 @@ def _test_configurations(
             config._populate_values()
 
             # Restrict configurations which could take too long on travis-ci
-            restrictions = {'classifier:passive_aggressive:n_iter': 5,
-                            'classifier:sgd:n_iter': 5,
-                            'classifier:adaboost:n_estimators': 50,
-                            'classifier:adaboost:max_depth': 1,
-                            'feature_preprocessor:kernel_pca:n_components': 10,
-                            'feature_preprocessor:kitchen_sinks:n_components': 50,
-                            'classifier:proj_logit:max_epochs': 1,
-                            'classifier:libsvm_svc:degree': 2,
-                            'regressor:libsvm_svr:degree': 2,
-                            'feature_preprocessor:truncatedSVD:target_dim': 10,
-                            'feature_preprocessor:polynomial:degree': 2,
-                            'classifier:lda:n_components': 10,
-                            'feature_preprocessor:nystroem_sampler:n_components': 50,
-                            'feature_preprocessor:feature_agglomeration:n_clusters': 2,
-                            'classifier:gradient_boosting:max_leaf_nodes': 64}
-
-            config._values.update({
-                param: value
-                for param, value in restrictions.items()
-                if param in config and config[param] is not None
-            })
+            restrictions = {
+                "classifier:passive_aggressive:n_iter": 5,
+                "classifier:sgd:n_iter": 5,
+                "classifier:adaboost:n_estimators": 50,
+                "classifier:adaboost:max_depth": 1,
+                "feature_preprocessor:kernel_pca:n_components": 10,
+                "feature_preprocessor:kitchen_sinks:n_components": 50,
+                "classifier:proj_logit:max_epochs": 1,
+                "classifier:libsvm_svc:degree": 2,
+                "regressor:libsvm_svr:degree": 2,
+                "feature_preprocessor:truncatedSVD:target_dim": 10,
+                "feature_preprocessor:polynomial:degree": 2,
+                "classifier:lda:n_components": 10,
+                "feature_preprocessor:nystroem_sampler:n_components": 50,
+                "feature_preprocessor:feature_agglomeration:n_clusters": 2,
+                "classifier:gradient_boosting:max_leaf_nodes": 64,
+            }
+
+            config._values.update(
+                {
+                    param: value
+                    for param, value in restrictions.items()
+                    if param in config and config[param] is not None
+                }
+            )
 
             if isinstance(dataset, str):
                 X_train, Y_train, X_test, Y_test = get_dataset(
-                    dataset=dataset,
-                    make_sparse=make_sparse,
-                    add_NaNs=True
+                    dataset=dataset, make_sparse=make_sparse, add_NaNs=True
                 )
             else:
-                X_train = dataset['X_train'].copy()
-                Y_train = dataset['Y_train'].copy()
-                X_test = dataset['X_test'].copy()
-                dataset['Y_test'].copy()
+                X_train = dataset["X_train"].copy()
+                Y_train = dataset["Y_train"].copy()
+                X_test = dataset["X_test"].copy()
+                dataset["Y_test"].copy()
 
             init_params_ = copy.deepcopy(init_params)
 
             cls = SimpleClassificationPipeline(
-                dataset_properties=dataset_properties,
-                init_params=init_params_
+                dataset_properties=dataset_properties, init_params=init_params_
             )
             cls.set_hyperparameters(config, init_params=init_params_)
 
             # First make sure that for this configuration, setting the parameters
             # does not mistakenly set the estimator as fitted
             for name, step in cls.named_steps.items():
-                with self.assertRaisesRegex(sklearn.exceptions.NotFittedError,
-                                            "instance is not fitted yet"):
+                with self.assertRaisesRegex(
+                    sklearn.exceptions.NotFittedError, "instance is not fitted yet"
+                ):
                     check_is_fitted(step)
 
             try:
@@ -526,15 +600,17 @@ def _test_configurations(
                     continue
                 elif "Numerical problems in QDA" in e.args[0]:
                     continue
-                elif 'Bug in scikit-learn' in e.args[0]:
+                elif "Bug in scikit-learn" in e.args[0]:
                     continue
-                elif 'The condensed distance matrix must contain only finite ' \
-                     'values.' in e.args[0]:
+                elif (
+                    "The condensed distance matrix must contain only finite "
+                    "values." in e.args[0]
+                ):
                     continue
-                elif 'Internal work array size computation failed' in e.args[0]:
+                elif "Internal work array size computation failed" in e.args[0]:
                     continue
                 # Assumed to be caused by knn with preprocessor fast_ica with whiten
-                elif 'Input contains NaN, infinity or a value too large' in e.args[0]:
+                elif "Input contains NaN, infinity or a value too large" in e.args[0]:
                     continue
                 else:
                     e.args += (f"config={config}",)
@@ -581,14 +657,18 @@ def test_get_hyperparameter_search_space(self):
         cs = pipeline.get_hyperparameter_search_space()
         self.assertIsInstance(cs, ConfigurationSpace)
 
-        rescale_param = 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__'
+        rescale_param = (
+            "data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__"
+        )
         n_choices = len(cs.get_hyperparameter(rescale_param).choices)
         self.assertEqual(n_choices, 7)
 
-        n_classifiers = len(cs.get_hyperparameter('classifier:__choice__').choices)
+        n_classifiers = len(cs.get_hyperparameter("classifier:__choice__").choices)
         self.assertEqual(n_classifiers, 16)
 
-        n_preprocessors = len(cs.get_hyperparameter('feature_preprocessor:__choice__').choices)
+        n_preprocessors = len(
+            cs.get_hyperparameter("feature_preprocessor:__choice__").choices
+        )
         self.assertEqual(n_preprocessors, 13)
 
         hyperparameters = cs.get_hyperparameters()
@@ -610,69 +690,71 @@ def test_get_hyperparameter_search_space_include_exclude_models(self):
 
         Expects
         -------
-        * Including a classifier choice has pipeline give back matching choice
-        * Excluding a classifier choice means it won't show up in the hyperparameter space
+        * Including a classifier has pipeline give back matching choice
+        * Excluding a classifier means it won't show up in the hyperparameter space
         * Including a feature preprocessor has pipeline give back matching choice
-        * Excluding a feature preprocessor means it won't show up in the hyperparameter space
+        * Excluding a feature preprocessor means it won't show up in the
+            hyperparameter space
         """
         # include a classifier choice
-        pipeline = SimpleClassificationPipeline(include={'classifier': ['libsvm_svc']})
+        pipeline = SimpleClassificationPipeline(include={"classifier": ["libsvm_svc"]})
         cs = pipeline.get_hyperparameter_search_space()
 
-        expected = CategoricalHyperparameter('classifier:__choice__', ['libsvm_svc'])
-        returned = cs.get_hyperparameter('classifier:__choice__')
+        expected = CategoricalHyperparameter("classifier:__choice__", ["libsvm_svc"])
+        returned = cs.get_hyperparameter("classifier:__choice__")
         self.assertEqual(returned, expected)
 
         # exclude a classifier choice
-        pipeline = SimpleClassificationPipeline(exclude={'classifier': ['libsvm_svc']})
+        pipeline = SimpleClassificationPipeline(exclude={"classifier": ["libsvm_svc"]})
         cs = pipeline.get_hyperparameter_search_space()
-        self.assertNotIn('libsvm_svc', str(cs))
+        self.assertNotIn("libsvm_svc", str(cs))
 
         # include a feature preprocessor
         pipeline = SimpleClassificationPipeline(
-            include={'feature_preprocessor': ['select_percentile_classification']}
+            include={"feature_preprocessor": ["select_percentile_classification"]}
         )
         cs = pipeline.get_hyperparameter_search_space()
 
-        returned = cs.get_hyperparameter('feature_preprocessor:__choice__')
+        returned = cs.get_hyperparameter("feature_preprocessor:__choice__")
         expected = CategoricalHyperparameter(
-            'feature_preprocessor:__choice__',
-            ['select_percentile_classification']
+            "feature_preprocessor:__choice__", ["select_percentile_classification"]
         )
         self.assertEqual(returned, expected)
 
         # exclude a feature preprocessor
         pipeline = SimpleClassificationPipeline(
-            exclude={'feature_preprocessor': ['select_percentile_classification']}
+            exclude={"feature_preprocessor": ["select_percentile_classification"]}
         )
         cs = pipeline.get_hyperparameter_search_space()
-        self.assertNotIn('select_percentile_classification', str(cs))
+        self.assertNotIn("select_percentile_classification", str(cs))
 
-    def test_get_hyperparameter_search_space_preprocessor_contradicts_default_classifier(self):
+    def test_get_hyperparameter_search_space_preprocessor_contradicts_default(
+        self,
+    ):
         """Test that the default classifier gets updated based on the legal feature
         preprocessors that come before.
 
         Expects
         -------
-        * With 'densifier' as only legal feature_preprocessor, 'qda' is default classifier
-        * With 'nystroem_sampler' as only legal feature_preprocessor, 'sgd' is default classifier
+        * With 'densifier' as only legal feature_preprocessor, 'qda' is default
+        * With 'nystroem_sampler' as only legal feature_preprocessor, 'sgd' is default
         """
         pipeline = SimpleClassificationPipeline(
-            include={'feature_preprocessor': ['densifier']},
-            dataset_properties={'sparse': True}
+            include={"feature_preprocessor": ["densifier"]},
+            dataset_properties={"sparse": True},
         )
         cs = pipeline.get_hyperparameter_search_space()
 
-        default_choice = cs.get_hyperparameter('classifier:__choice__').default_value
-        self.assertEqual(default_choice, 'qda')
+        default_choice = cs.get_hyperparameter("classifier:__choice__").default_value
+        self.assertEqual(default_choice, "qda")
 
         pipeline = SimpleClassificationPipeline(
-            include={'feature_preprocessor': ['nystroem_sampler']}
+            include={"feature_preprocessor": ["nystroem_sampler"]}
         )
         cs = pipeline.get_hyperparameter_search_space()
 
-        default_choice = cs.get_hyperparameter('classifier:__choice__').default_value
-        self.assertEqual(default_choice, 'sgd')
+        default_choice = cs.get_hyperparameter("classifier:__choice__").default_value
+        self.assertEqual(default_choice, "sgd")
 
     def test_get_hyperparameter_search_space_only_forbidden_combinations(self):
         """Test that invalid pipeline configurations raise errors
@@ -686,43 +768,48 @@ def test_get_hyperparameter_search_space_only_forbidden_combinations(self):
         with self.assertRaisesRegex(AssertionError, "No valid pipeline found."):
             SimpleClassificationPipeline(
                 include={
-                    'classifier': ['multinomial_nb'],
-                    'feature_preprocessor': ['pca']
+                    "classifier": ["multinomial_nb"],
+                    "feature_preprocessor": ["pca"],
                 },
-                dataset_properties={'sparse': True}
+                dataset_properties={"sparse": True},
             )
 
-        with self.assertRaisesRegex(ValueError, "Cannot find a legal default configuration."):
+        with self.assertRaisesRegex(
+            ValueError, "Cannot find a legal default configuration."
+        ):
             SimpleClassificationPipeline(
                 include={
-                    'classifier': ['liblinear_svc'],
-                    'feature_preprocessor': ['densifier']
+                    "classifier": ["liblinear_svc"],
+                    "feature_preprocessor": ["densifier"],
                 },
-                dataset_properties={'sparse': True}
+                dataset_properties={"sparse": True},
             )
 
     @unittest.skip("Wait until ConfigSpace is fixed.")
     def test_get_hyperparameter_search_space_dataset_properties(self):
         cs_mc = SimpleClassificationPipeline.get_hyperparameter_search_space(
-            dataset_properties={'multiclass': True}
+            dataset_properties={"multiclass": True}
         )
-        self.assertNotIn('bernoulli_nb', str(cs_mc))
+        self.assertNotIn("bernoulli_nb", str(cs_mc))
 
         cs_ml = SimpleClassificationPipeline.get_hyperparameter_search_space(
-            dataset_properties={'multilabel': True})
-        self.assertNotIn('k_nearest_neighbors', str(cs_ml))
-        self.assertNotIn('liblinear', str(cs_ml))
-        self.assertNotIn('libsvm_svc', str(cs_ml))
-        self.assertNotIn('sgd', str(cs_ml))
+            dataset_properties={"multilabel": True}
+        )
+        self.assertNotIn("k_nearest_neighbors", str(cs_ml))
+        self.assertNotIn("liblinear", str(cs_ml))
+        self.assertNotIn("libsvm_svc", str(cs_ml))
+        self.assertNotIn("sgd", str(cs_ml))
 
         cs_sp = SimpleClassificationPipeline.get_hyperparameter_search_space(
-            dataset_properties={'sparse': True})
-        self.assertIn('extra_trees', str(cs_sp))
-        self.assertIn('gradient_boosting', str(cs_sp))
-        self.assertIn('random_forest', str(cs_sp))
+            dataset_properties={"sparse": True}
+        )
+        self.assertIn("extra_trees", str(cs_sp))
+        self.assertIn("gradient_boosting", str(cs_sp))
+        self.assertIn("random_forest", str(cs_sp))
 
         cs_mc_ml = SimpleClassificationPipeline.get_hyperparameter_search_space(
-            dataset_properties={'multilabel': True, 'multiclass': True})
+            dataset_properties={"multilabel": True, "multiclass": True}
+        )
         self.assertEqual(cs_ml, cs_mc_ml)
 
     def test_predict_batched(self):
@@ -733,12 +820,13 @@ def test_predict_batched(self):
         -------
         * Should expect the output shape to match that of the digits dataset
         * Should expect a fixed call count each test run
-        * Should expect predict_proba with `batches` and predict_proba perform near identically
+        * Should expect predict_proba with `batches` and predict_proba
+            perform near identically
         """
-        cls = SimpleClassificationPipeline(include={'classifier': ['sgd']})
+        cls = SimpleClassificationPipeline(include={"classifier": ["sgd"]})
 
         # Multiclass
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
 
         with ignore_warnings(classifier_warnings):
             cls.fit(X_train, Y_train)
@@ -764,15 +852,17 @@ def test_predict_batched_sparse(self):
         -------
         * Should expect the output shape to match that of the digits dataset
         * Should expect a fixed call count each test run
-        * Should expect predict_proba with `batches` and predict_proba perform near identically
+        * Should expect predict_proba with `batches` and predict_proba
+            perform near identically
         """
         cls = SimpleClassificationPipeline(
-            dataset_properties={'sparse': True},
-            include={'classifier': ['sgd']}
+            dataset_properties={"sparse": True}, include={"classifier": ["sgd"]}
         )
 
         # Multiclass
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True)
+        X_train, Y_train, X_test, Y_test = get_dataset(
+            dataset="digits", make_sparse=True
+        )
         with ignore_warnings(classifier_warnings):
             cls.fit(X_train, Y_train)
 
@@ -797,11 +887,12 @@ def test_predict_proba_batched(self):
         -------
         * Should expect the output shape to match that of the digits dataset
         * Should expect a fixed call count each test run
-        * Should expect predict_proba with `batches` and predict_proba perform near identically
+        * Should expect predict_proba with `batches` and predict_proba
+            perform near identically
         """
         # Multiclass
-        cls = SimpleClassificationPipeline(include={'classifier': ['sgd']})
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
+        cls = SimpleClassificationPipeline(include={"classifier": ["sgd"]})
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
 
         with ignore_warnings(classifier_warnings):
             cls.fit(X_train, Y_train)
@@ -820,10 +911,11 @@ def test_predict_proba_batched(self):
         np.testing.assert_array_almost_equal(prediction_, prediction)
 
         # Multilabel
-        cls = SimpleClassificationPipeline(include={'classifier': ['lda']})
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits')
-        Y_train = np.array(list([(list([1 if i != y else 0 for i in range(10)]))
-                                 for y in Y_train]))
+        cls = SimpleClassificationPipeline(include={"classifier": ["lda"]})
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits")
+        Y_train = np.array(
+            list([(list([1 if i != y else 0 for i in range(10)])) for y in Y_train])
+        )
 
         with ignore_warnings(classifier_warnings):
             cls.fit(X_train, Y_train)
@@ -849,15 +941,18 @@ def test_predict_proba_batched_sparse(self):
         -------
         * Should expect the output shape to match that of the digits dataset
         * Should expect a fixed call count each test run
-        * Should expect predict_proba with `batches` and predict_proba perform near identically
+        * Should expect predict_proba with `batches` and predict_proba
+            perform near identically
         """
         cls = SimpleClassificationPipeline(
-            dataset_properties={'sparse': True, 'multiclass': True},
-            include={'classifier': ['sgd']}
+            dataset_properties={"sparse": True, "multiclass": True},
+            include={"classifier": ["sgd"]},
         )
 
         # Multiclass
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True)
+        X_train, Y_train, X_test, Y_test = get_dataset(
+            dataset="digits", make_sparse=True
+        )
         X_test_ = X_test.copy()
 
         with ignore_warnings(classifier_warnings):
@@ -877,10 +972,12 @@ def test_predict_proba_batched_sparse(self):
 
         # Multilabel
         cls = SimpleClassificationPipeline(
-            dataset_properties={'sparse': True, 'multilabel': True},
-            include={'classifier': ['lda']}
+            dataset_properties={"sparse": True, "multilabel": True},
+            include={"classifier": ["lda"]},
+        )
+        X_train, Y_train, X_test, Y_test = get_dataset(
+            dataset="digits", make_sparse=True
         )
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True)
 
         X_test_ = X_test.copy()
         Y_train = np.array([[1 if i != y else 0 for i in range(10)] for y in Y_train])
@@ -909,7 +1006,7 @@ def test_pipeline_clonability(self):
         * The cloned object can be constructed from theses params
         * The reconstructed clone and the original have the same param values
         """
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="iris")
 
         auto = SimpleClassificationPipeline()
 
@@ -952,18 +1049,24 @@ def test_add_classifier(self):
         * There should be 1 component after adding a classifier
         * The classifier should be in the search space of the Pipeline after being added
         """
-        self.assertEqual(len(classification_components.additional_components.components), 0)
-        self.assertEqual(len(_addons['classification'].components), 0)
+        self.assertEqual(
+            len(classification_components.additional_components.components), 0
+        )
+        self.assertEqual(len(_addons["classification"].components), 0)
 
         classification_components.add_classifier(DummyClassifier)
 
-        self.assertEqual(len(classification_components.additional_components.components), 1)
-        self.assertEqual(len(_addons['classification'].components), 1)
+        self.assertEqual(
+            len(classification_components.additional_components.components), 1
+        )
+        self.assertEqual(len(_addons["classification"].components), 1)
 
         cs = SimpleClassificationPipeline().get_hyperparameter_search_space()
-        self.assertIn('DummyClassifier', str(cs))
+        self.assertIn("DummyClassifier", str(cs))
 
-        del classification_components.additional_components.components['DummyClassifier']
+        del classification_components.additional_components.components[
+            "DummyClassifier"
+        ]
 
     def test_add_preprocessor(self):
         """Test that preprocessors can be added
@@ -972,22 +1075,30 @@ def test_add_preprocessor(self):
         -------
         * There should be 0 components initially
         * There should be 1 component after adding a preprocessor
-        * The preprocessor should be in the search space of the Pipeline after being added
+        * The preprocessor ii in the search space of the Pipeline after being added
         """
-        self.assertEqual(len(preprocessing_components.additional_components.components), 0)
-        self.assertEqual(len(_addons['feature_preprocessing'].components), 0)
+        self.assertEqual(
+            len(preprocessing_components.additional_components.components), 0
+        )
+        self.assertEqual(len(_addons["feature_preprocessing"].components), 0)
 
         preprocessing_components.add_preprocessor(DummyPreprocessor)
 
-        self.assertEqual(len(preprocessing_components.additional_components.components), 1)
-        self.assertEqual(len(_addons['feature_preprocessing'].components), 1)
+        self.assertEqual(
+            len(preprocessing_components.additional_components.components), 1
+        )
+        self.assertEqual(len(_addons["feature_preprocessing"].components), 1)
 
         cs = SimpleClassificationPipeline().get_hyperparameter_search_space()
-        self.assertIn('DummyPreprocessor', str(cs))
+        self.assertIn("DummyPreprocessor", str(cs))
 
-        del preprocessing_components.additional_components.components['DummyPreprocessor']
+        del preprocessing_components.additional_components.components[
+            "DummyPreprocessor"
+        ]
 
-    def _test_set_hyperparameter_choice(self, expected_key, implementation, config_dict):
+    def _test_set_hyperparameter_choice(
+        self, expected_key, implementation, config_dict
+    ):
         """Given a configuration in config, this procedure makes sure that the given
         implementation, which should be a Choice component, honors the type of the
         object, and any hyperparameter associated to it
@@ -1001,14 +1112,16 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
 
         # Are there further hyperparams?
         # A choice component might have attribute requirements that we need to check
-        expected_sub_key = expected_key.replace(':__choice__', ':') + implementation_type
+        expected_sub_key = (
+            expected_key.replace(":__choice__", ":") + implementation_type
+        )
         expected_attributes = {}
-        if 'data_preprocessor:__choice__' in expected_key:
+        if "data_preprocessor:__choice__" in expected_key:
             # We have to check both the numerical and categorical
             to_check = {
-                'numerical_transformer': implementation.choice.numer_ppl.named_steps,
-                'categorical_transformer': implementation.choice.categ_ppl.named_steps,
-                'text_transformer': implementation.choice.txt_ppl.named_steps,
+                "numerical_transformer": implementation.choice.numer_ppl.named_steps,
+                "categorical_transformer": implementation.choice.categ_ppl.named_steps,
+                "text_transformer": implementation.choice.txt_ppl.named_steps,
             }
 
             for data_type, pipeline in to_check.items():
@@ -1016,8 +1129,7 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
                     # If it is a Choice, make sure it is the correct one!
                     if isinstance(sub_step, AutoSklearnChoice):
                         key = "data_preprocessor:feature_type:{}:{}:__choice__".format(
-                            data_type,
-                            sub_name
+                            data_type, sub_name
                         )
                         keys_checked.extend(
                             self._test_set_hyperparameter_choice(
@@ -1029,10 +1141,10 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
                         keys_checked.extend(
                             self._test_set_hyperparameter_component(
                                 "data_preprocessor:feature_type:{}:{}".format(
-                                    data_type,
-                                    sub_name
+                                    data_type, sub_name
                                 ),
-                                sub_step, config_dict
+                                sub_step,
+                                config_dict,
                             )
                         )
                     else:
@@ -1041,7 +1153,7 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
 
         for key, value in config_dict.items():
             if key != expected_key and expected_sub_key in key:
-                expected_attributes[key.split(':')[-1]] = value
+                expected_attributes[key.split(":")[-1]] = value
                 keys_checked.append(key)
 
         if expected_attributes:
@@ -1053,7 +1165,9 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
 
         return keys_checked
 
-    def _test_set_hyperparameter_component(self, expected_key, implementation, config_dict):
+    def _test_set_hyperparameter_component(
+        self, expected_key, implementation, config_dict
+    ):
         """
         Given a configuration in config, this procedure makes sure that
         the given implementation, which should be a autosklearn component, honors
@@ -1067,15 +1181,14 @@ def _test_set_hyperparameter_component(self, expected_key, implementation, confi
         for key, value in config_dict.items():
             if expected_key in key:
                 keys_checked.append(key)
-                key = key.replace(expected_key + ':', '')
-                if ':' in key:
-                    raise ValueError("This utility should only be called with a "
-                                     "matching string that produces leaf configurations, "
-                                     "that is no further colons are expected, yet key={}"
-                                     "".format(
-                                            key
-                                        )
-                                     )
+                key = key.replace(expected_key + ":", "")
+                if ":" in key:
+                    raise ValueError(
+                        "This utility should only be called with a "
+                        "matching string that produces leaf configurations, "
+                        "that is no further colons are expected, yet key={}"
+                        "".format(key)
+                    )
                 expected_attributes[key] = value
         # self.assertDictContainsSubset(expected_attributes, attributes)
         # Cannot check the whole dictionary, just names, as some
@@ -1097,12 +1210,17 @@ def test_set_hyperparameters_honors_configuration(self):
         """
         random_state = 1
         all_combinations = list(itertools.product([True, False], repeat=4))
-        for sparse, multilabel, signed, multiclass, in all_combinations:
+        for (
+            sparse,
+            multilabel,
+            signed,
+            multiclass,
+        ) in all_combinations:
             dataset_properties = {
-                'sparse': sparse,
-                'multilabel': multilabel,
-                'multiclass': multiclass,
-                'signed': signed,
+                "sparse": sparse,
+                "multilabel": multilabel,
+                "multiclass": multiclass,
+                "signed": signed,
             }
             cls = SimpleClassificationPipeline(
                 random_state=random_state,
@@ -1121,36 +1239,37 @@ def test_set_hyperparameters_honors_configuration(self):
             keys_checked = []
 
             for name, step in cls.named_steps.items():
-                if name == 'data_preprocessor':
+                if name == "data_preprocessor":
                     keys_checked.extend(
                         self._test_set_hyperparameter_choice(
-                            'data_preprocessor:__choice__', step, config_dict
+                            "data_preprocessor:__choice__", step, config_dict
                         )
                     )
                     self.assertEqual(step.random_state, random_state)
-                elif name == 'balancing':
+                elif name == "balancing":
                     keys_checked.extend(
                         self._test_set_hyperparameter_component(
-                            'balancing',
-                            step, config_dict
+                            "balancing", step, config_dict
                         )
                     )
-                elif name == 'feature_preprocessor':
+                elif name == "feature_preprocessor":
                     keys_checked.extend(
                         self._test_set_hyperparameter_choice(
-                            'feature_preprocessor:__choice__', step, config_dict
+                            "feature_preprocessor:__choice__", step, config_dict
                         )
                     )
                     self.assertEqual(step.random_state, random_state)
-                elif name == 'classifier':
+                elif name == "classifier":
                     keys_checked.extend(
                         self._test_set_hyperparameter_choice(
-                            'classifier:__choice__', step, config_dict
+                            "classifier:__choice__", step, config_dict
                         )
                     )
                     self.assertEqual(step.random_state, random_state)
                 else:
-                    raise ValueError("Found another type of step! Need to update this check")
+                    raise ValueError(
+                        "Found another type of step! Need to update this check"
+                    )
 
             # Make sure we checked the whole configuration
             self.assertSetEqual(set(config_dict.keys()), set(keys_checked))
@@ -1162,18 +1281,18 @@ def test_fit_instantiates_component(self):
 
         # We reduce the search space as forbidden clauses prevent to instantiate
         # the user defined preprocessor manually
-        cls = SimpleClassificationPipeline(
-            include={'classifier': ['random_forest']}
-        )
+        cls = SimpleClassificationPipeline(include={"classifier": ["random_forest"]})
         cs = cls.get_hyperparameter_search_space()
-        self.assertIn('CrashPreprocessor', str(cs))
+        self.assertIn("CrashPreprocessor", str(cs))
         config = cs.sample_configuration()
         try:
-            config['feature_preprocessor:__choice__'] = 'CrashPreprocessor'
+            config["feature_preprocessor:__choice__"] = "CrashPreprocessor"
         except Exception as e:
             # In case of failure clean up the components and print enough information
             # to clean up with check in the future
-            del preprocessing_components.additional_components.components['CrashPreprocessor']
+            del preprocessing_components.additional_components.components[
+                "CrashPreprocessor"
+            ]
             self.fail("cs={} config={} Exception={}".format(cs, config, e))
 
         cls.set_hyperparameters(config)
@@ -1182,7 +1301,9 @@ def test_fit_instantiates_component(self):
             with ignore_warnings(classifier_warnings):
                 cls.fit(
                     X=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]),
-                    y=np.array([1, 0, 1, 1])
+                    y=np.array([1, 0, 1, 1]),
                 )
 
-        del preprocessing_components.additional_components.components['CrashPreprocessor']
+        del preprocessing_components.additional_components.components[
+            "CrashPreprocessor"
+        ]
diff --git a/test/test_pipeline/test_create_searchspace_util_classification.py b/test/test_pipeline/test_create_searchspace_util_classification.py
index 7bf1450979..a830430097 100644
--- a/test/test_pipeline/test_create_searchspace_util_classification.py
+++ b/test/test_pipeline/test_create_searchspace_util_classification.py
@@ -1,20 +1,23 @@
+import unittest
 from collections import OrderedDict
 
-import unittest
 import numpy
-
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
 
-from autosklearn.pipeline.components.classification.liblinear_svc import LibLinear_SVC
+import autosklearn.pipeline.create_searchspace_util
 from autosklearn.pipeline.components.classification.lda import LDA
-
+from autosklearn.pipeline.components.classification.liblinear_svc import LibLinear_SVC
+from autosklearn.pipeline.components.feature_preprocessing.no_preprocessing import (
+    NoPreprocessing,
+)
 from autosklearn.pipeline.components.feature_preprocessing.pca import PCA
-from autosklearn.pipeline.components.feature_preprocessing.truncatedSVD import TruncatedSVD
-from autosklearn.pipeline.components.feature_preprocessing.no_preprocessing import NoPreprocessing
-from autosklearn.pipeline.components.feature_preprocessing.random_trees_embedding \
-    import RandomTreesEmbedding
-import autosklearn.pipeline.create_searchspace_util
+from autosklearn.pipeline.components.feature_preprocessing.random_trees_embedding import (  # noqa: E501
+    RandomTreesEmbedding,
+)
+from autosklearn.pipeline.components.feature_preprocessing.truncatedSVD import (
+    TruncatedSVD,
+)
 
 
 class TestCreateClassificationSearchspace(unittest.TestCase):
@@ -23,9 +26,9 @@ class TestCreateClassificationSearchspace(unittest.TestCase):
     def test_get_match_array_sparse_and_dense(self):
         # preproc is empty
         preprocessors = OrderedDict()
-        preprocessors['pca'] = PCA
+        preprocessors["pca"] = PCA
         classifiers = OrderedDict()
-        classifiers['lda'] = LDA
+        classifiers["lda"] = LDA
         # Sparse + dense
 
         class Preprocessors(object):
@@ -40,62 +43,69 @@ def get_available_components(self, *args, **kwargs):
 
         # Dense
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
-            pipeline=((0, PCA), (1, LDA)), dataset_properties={'sparse': True})
+            pipeline=((0, PCA), (1, LDA)), dataset_properties={"sparse": True}
+        )
         self.assertEqual(numpy.sum(m), 0)
 
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
-            pipeline=((0, PCA), (1, LDA)), dataset_properties={'sparse': False})
+            pipeline=((0, PCA), (1, LDA)), dataset_properties={"sparse": False}
+        )
         self.assertEqual(m, [[1]])
 
         # Sparse
-        preprocessors['tSVD'] = TruncatedSVD
+        preprocessors["tSVD"] = TruncatedSVD
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
-            pipeline=((0, Preprocessors), (1, LDA)),
-            dataset_properties={'sparse': True})
+            pipeline=((0, Preprocessors), (1, LDA)), dataset_properties={"sparse": True}
+        )
         self.assertEqual(m[0], [0])  # pca
         self.assertEqual(m[1], [1])  # svd
 
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
             pipeline=((0, Preprocessors), (1, LDA)),
-            dataset_properties={'sparse': False})
+            dataset_properties={"sparse": False},
+        )
         self.assertEqual(m[0], [1])  # pca
         self.assertEqual(m[1], [0])  # svd
 
-        preprocessors['none'] = NoPreprocessing
+        preprocessors["none"] = NoPreprocessing
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
-            pipeline=((0, Preprocessors), (1, LDA)),
-            dataset_properties={'sparse': True})
+            pipeline=((0, Preprocessors), (1, LDA)), dataset_properties={"sparse": True}
+        )
         self.assertEqual(m[0, :], [0])  # pca
         self.assertEqual(m[1, :], [1])  # tsvd
         self.assertEqual(m[2, :], [0])  # none
 
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
             pipeline=((0, Preprocessors), (1, LDA)),
-            dataset_properties={'sparse': False})
+            dataset_properties={"sparse": False},
+        )
         self.assertEqual(m[0, :], [1])  # pca
         self.assertEqual(m[1, :], [0])  # tsvd
         self.assertEqual(m[2, :], [1])  # none
 
-        classifiers['libsvm'] = LibLinear_SVC
+        classifiers["libsvm"] = LibLinear_SVC
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
             pipeline=((0, Preprocessors), (1, Classifiers)),
-            dataset_properties={'sparse': False})
+            dataset_properties={"sparse": False},
+        )
         self.assertListEqual(list(m[0, :]), [1, 1])  # pca
         self.assertListEqual(list(m[1, :]), [0, 0])  # tsvd
         self.assertListEqual(list(m[2, :]), [1, 1])  # none
 
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
             pipeline=((0, Preprocessors), (1, Classifiers)),
-            dataset_properties={'sparse': True})
+            dataset_properties={"sparse": True},
+        )
         self.assertListEqual(list(m[0, :]), [0, 0])  # pca
         self.assertListEqual(list(m[1, :]), [1, 1])  # tsvd
         self.assertListEqual(list(m[2, :]), [0, 1])  # none
 
         # Do fancy 3d stuff
-        preprocessors['random_trees'] = RandomTreesEmbedding
+        preprocessors["random_trees"] = RandomTreesEmbedding
         m = autosklearn.pipeline.create_searchspace_util.get_match_array(
             pipeline=((0, Preprocessors), (1, Preprocessors), (2, Classifiers)),
-            dataset_properties={'sparse': False})
+            dataset_properties={"sparse": False},
+        )
         # PCA followed by truncated SVD is forbidden
         self.assertEqual(list(m[0].flatten()), [1, 1, 0, 0, 1, 1, 0, 1])
         # Truncated SVD is forbidden
@@ -112,28 +122,38 @@ def test_get_match_array_signed_unsigned_and_binary(self):
     @unittest.skip("Not currently working.")
     def test_add_forbidden(self):
         m = numpy.ones([2, 3])
-        preprocessors_list = ['pa', 'pb']
-        classifier_list = ['ca', 'cb', 'cc']
+        preprocessors_list = ["pa", "pb"]
+        classifier_list = ["ca", "cb", "cc"]
         cs = ConfigurationSpace()
-        preprocessor = CategoricalHyperparameter(name='feature_preprocessor',
-                                                 choices=preprocessors_list)
-        classifier = CategoricalHyperparameter(name='classifier',
-                                               choices=classifier_list)
+        preprocessor = CategoricalHyperparameter(
+            name="feature_preprocessor", choices=preprocessors_list
+        )
+        classifier = CategoricalHyperparameter(
+            name="classifier", choices=classifier_list
+        )
         cs.add_hyperparameter(preprocessor)
         cs.add_hyperparameter(classifier)
         new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
-            conf_space=cs, node_0_list=preprocessors_list,
-            node_1_list=classifier_list, matches=m,
-            node_0_name='feature_preprocessor', node_1_name="classifier")
+            conf_space=cs,
+            node_0_list=preprocessors_list,
+            node_1_list=classifier_list,
+            matches=m,
+            node_0_name="feature_preprocessor",
+            node_1_name="classifier",
+        )
         self.assertEqual(len(new_cs.forbidden_clauses), 0)
         self.assertIsInstance(new_cs, ConfigurationSpace)
 
         m[1, 1] = 0
         new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
-            conf_space=cs, node_0_list=preprocessors_list,
-            node_1_list=classifier_list, matches=m,
-            node_0_name='feature_preprocessor', node_1_name="classifier")
+            conf_space=cs,
+            node_0_list=preprocessors_list,
+            node_1_list=classifier_list,
+            matches=m,
+            node_0_name="feature_preprocessor",
+            node_1_name="classifier",
+        )
         self.assertEqual(len(new_cs.forbidden_clauses), 1)
-        self.assertEqual(new_cs.forbidden_clauses[0].components[0].value, 'cb')
-        self.assertEqual(new_cs.forbidden_clauses[0].components[1].value, 'pb')
+        self.assertEqual(new_cs.forbidden_clauses[0].components[0].value, "cb")
+        self.assertEqual(new_cs.forbidden_clauses[0].components[1].value, "pb")
         self.assertIsInstance(new_cs, ConfigurationSpace)
diff --git a/test/test_pipeline/test_regression.py b/test/test_pipeline/test_regression.py
index fccf59af67..501b73ec5d 100644
--- a/test/test_pipeline/test_regression.py
+++ b/test/test_pipeline/test_regression.py
@@ -5,28 +5,36 @@
 import unittest
 import unittest.mock
 
-from joblib import Memory
 import numpy as np
 import sklearn.datasets
 import sklearn.decomposition
-from sklearn.base import clone
 import sklearn.ensemble
 import sklearn.svm
-from sklearn.utils.validation import check_is_fitted
-
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
+from joblib import Memory
+from sklearn.base import clone
+from sklearn.utils.validation import check_is_fitted
 
-from autosklearn.pipeline.regression import SimpleRegressionPipeline
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm, AutoSklearnRegressionAlgorithm
-import autosklearn.pipeline.components.regression as regression_components
-from autosklearn.pipeline.components.base import AutoSklearnComponent, AutoSklearnChoice
 import autosklearn.pipeline.components.feature_preprocessing as preprocessing_components
+import autosklearn.pipeline.components.regression as regression_components
+from autosklearn.pipeline.components.base import (
+    AutoSklearnChoice,
+    AutoSklearnComponent,
+    AutoSklearnPreprocessingAlgorithm,
+    AutoSklearnRegressionAlgorithm,
+)
+from autosklearn.pipeline.constants import (
+    DENSE,
+    PREDICTIONS,
+    SIGNED_DATA,
+    SPARSE,
+    UNSIGNED_DATA,
+)
+from autosklearn.pipeline.regression import SimpleRegressionPipeline
 from autosklearn.pipeline.util import get_dataset
-from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS
 
-from test.test_pipeline.ignored_warnings import regressor_warnings, ignore_warnings
+from test.test_pipeline.ignored_warnings import ignore_warnings, regressor_warnings
 
 
 class SimpleRegressionPipelineTest(unittest.TestCase):
@@ -38,41 +46,43 @@ def test_io_dict(self):
             if regressors[r] == regression_components.RegressorChoice:
                 continue
             props = regressors[r].get_properties()
-            self.assertIn('input', props)
-            self.assertIn('output', props)
-            inp = props['input']
-            output = props['output']
+            self.assertIn("input", props)
+            self.assertIn("output", props)
+            inp = props["input"]
+            output = props["output"]
 
             self.assertIsInstance(inp, tuple)
             self.assertIsInstance(output, tuple)
             for i in inp:
                 self.assertIn(i, (SPARSE, DENSE, SIGNED_DATA, UNSIGNED_DATA))
             self.assertEqual(output, (PREDICTIONS,))
-            self.assertIn('handles_regression', props)
-            self.assertTrue(props['handles_regression'])
-            self.assertIn('handles_classification', props)
-            self.assertIn('handles_multiclass', props)
-            self.assertIn('handles_multilabel', props)
-            self.assertIn('handles_multioutput', props)
-            self.assertFalse(props['handles_classification'])
-            self.assertFalse(props['handles_multiclass'])
-            self.assertFalse(props['handles_multilabel'])
+            self.assertIn("handles_regression", props)
+            self.assertTrue(props["handles_regression"])
+            self.assertIn("handles_classification", props)
+            self.assertIn("handles_multiclass", props)
+            self.assertIn("handles_multilabel", props)
+            self.assertIn("handles_multioutput", props)
+            self.assertFalse(props["handles_classification"])
+            self.assertFalse(props["handles_multiclass"])
+            self.assertFalse(props["handles_multilabel"])
 
     def test_find_regressors(self):
         regressors = regression_components._regressors
         self.assertGreaterEqual(len(regressors), 1)
         for key in regressors:
-            if hasattr(regressors[key], 'get_components'):
+            if hasattr(regressors[key], "get_components"):
                 continue
             self.assertIn(AutoSklearnRegressionAlgorithm, regressors[key].__bases__)
 
     def test_find_preprocessors(self):
         preprocessors = preprocessing_components._preprocessors
-        self.assertGreaterEqual(len(preprocessors),  1)
+        self.assertGreaterEqual(len(preprocessors), 1)
         for key in preprocessors:
-            if hasattr(preprocessors[key], 'get_components'):
+            if hasattr(preprocessors[key], "get_components"):
                 continue
-            self.assertIn(AutoSklearnPreprocessingAlgorithm, preprocessors[key].__bases__)
+            self.assertIn(
+                AutoSklearnPreprocessingAlgorithm, preprocessors[key].__bases__
+            )
 
     def test_configurations(self):
         cs = SimpleRegressionPipeline().get_hyperparameter_search_space()
@@ -80,27 +90,28 @@ def test_configurations(self):
         self._test_configurations(cs)
 
     def test_configurations_signed_data(self):
-        dataset_properties = {'signed': True}
-        cs = SimpleRegressionPipeline(dataset_properties=dataset_properties).\
-            get_hyperparameter_search_space()
+        dataset_properties = {"signed": True}
+        cs = SimpleRegressionPipeline(
+            dataset_properties=dataset_properties
+        ).get_hyperparameter_search_space()
 
-        self._test_configurations(configurations_space=cs,
-                                  dataset_properties=dataset_properties)
+        self._test_configurations(
+            configurations_space=cs, dataset_properties=dataset_properties
+        )
 
     def test_configurations_sparse(self):
-        dataset_properties = {'sparse': True}
+        dataset_properties = {"sparse": True}
         cs = SimpleRegressionPipeline(
             dataset_properties=dataset_properties
         ).get_hyperparameter_search_space()
 
-        self._test_configurations(cs, make_sparse=True,
-                                  dataset_properties=dataset_properties)
+        self._test_configurations(
+            cs, make_sparse=True, dataset_properties=dataset_properties
+        )
 
     def test_multioutput(self):
         cache = Memory(location=tempfile.gettempdir())
-        cached_func = cache.cache(
-            sklearn.datasets.make_regression
-        )
+        cached_func = cache.cache(sklearn.datasets.make_regression)
         X, Y = cached_func(
             n_samples=250,
             n_features=20,
@@ -112,24 +123,33 @@ def test_multioutput(self):
             noise=0.3,
             shuffle=True,
             coef=False,
-            random_state=1
+            random_state=1,
         )
         X_train = X[:200, :]
         Y_train = Y[:200, :]
         X_test = X[200:, :]
         Y_test = Y[200:, :]
 
-        data = {'X_train': X_train, 'Y_train': Y_train,
-                'X_test': X_test, 'Y_test': Y_test}
+        data = {
+            "X_train": X_train,
+            "Y_train": Y_train,
+            "X_test": X_test,
+            "Y_test": Y_test,
+        }
 
-        dataset_properties = {'multioutput': True}
+        dataset_properties = {"multioutput": True}
         pipeline = SimpleRegressionPipeline(dataset_properties=dataset_properties)
         cs = pipeline.get_hyperparameter_search_space()
 
         self._test_configurations(cs, data=data, dataset_properties=dataset_properties)
 
-    def _test_configurations(self, configurations_space, make_sparse=False,
-                             data=None, dataset_properties=None):
+    def _test_configurations(
+        self,
+        configurations_space,
+        make_sparse=False,
+        data=None,
+        dataset_properties=None,
+    ):
         # Use a limit of ~4GiB
         limit = 3072 * 1024 * 1024
         resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
@@ -141,42 +161,48 @@ def _test_configurations(self, configurations_space, make_sparse=False,
             config._populate_values()
 
             # Restrict configurations which could take too long on travis-ci
-            restrictions = {'regressor:adaboost:n_estimators': 50,
-                            'regressor:adaboost:max_depth': 1,
-                            'feature_preprocessor:kernel_pca:n_components': 10,
-                            'feature_preprocessor:kitchen_sinks:n_components': 50,
-                            'regressor:libsvm_svc:degree': 2,
-                            'regressor:libsvm_svr:degree': 2,
-                            'regressor:libsvm_svr:C': 1.,
-                            'feature_preprocessor:truncatedSVD:target_dim': 10,
-                            'feature_preprocessor:polynomial:degree': 2,
-                            'regressor:lda:n_components': 10}
+            restrictions = {
+                "regressor:adaboost:n_estimators": 50,
+                "regressor:adaboost:max_depth": 1,
+                "feature_preprocessor:kernel_pca:n_components": 10,
+                "feature_preprocessor:kitchen_sinks:n_components": 50,
+                "regressor:libsvm_svc:degree": 2,
+                "regressor:libsvm_svr:degree": 2,
+                "regressor:libsvm_svr:C": 1.0,
+                "feature_preprocessor:truncatedSVD:target_dim": 10,
+                "feature_preprocessor:polynomial:degree": 2,
+                "regressor:lda:n_components": 10,
+            }
 
             for restrict_parameter in restrictions:
                 restrict_to = restrictions[restrict_parameter]
-                if restrict_parameter in config and config[restrict_parameter] is not None:
+                if (
+                    restrict_parameter in config
+                    and config[restrict_parameter] is not None
+                ):
                     config._values[restrict_parameter] = restrict_to
 
             if data is None:
                 X_train, Y_train, X_test, Y_test = get_dataset(
-                    dataset='boston', make_sparse=make_sparse, add_NaNs=True)
+                    dataset="boston", make_sparse=make_sparse, add_NaNs=True
+                )
             else:
-                X_train = data['X_train'].copy()
-                Y_train = data['Y_train'].copy()
-                X_test = data['X_test'].copy()
-                data['Y_test'].copy()
+                X_train = data["X_train"].copy()
+                Y_train = data["Y_train"].copy()
+                X_test = data["X_test"].copy()
+                data["Y_test"].copy()
 
             cls = SimpleRegressionPipeline(
-                random_state=1,
-                dataset_properties=dataset_properties
+                random_state=1, dataset_properties=dataset_properties
             )
             cls.set_hyperparameters(config)
 
             # First make sure that for this configuration, setting the parameters
             # does not mistakenly set the estimator as fitted
             for name, step in cls.named_steps.items():
-                with self.assertRaisesRegex(sklearn.exceptions.NotFittedError,
-                                            "instance is not fitted yet"):
+                with self.assertRaisesRegex(
+                    sklearn.exceptions.NotFittedError, "instance is not fitted yet"
+                ):
                     check_is_fitted(step)
 
             try:
@@ -190,9 +216,9 @@ def _test_configurations(self, configurations_space, make_sparse=False,
                     for name, step in cls.named_steps.items():
                         check_is_fitted(step)
                 except sklearn.exceptions.NotFittedError:
-                    self.fail("config={} raised NotFittedError unexpectedly!".format(
-                        config
-                    ))
+                    self.fail(
+                        "config={} raised NotFittedError unexpectedly!".format(config)
+                    )
 
                 cls.predict(X_test)
             except MemoryError:
@@ -200,8 +226,7 @@ def _test_configurations(self, configurations_space, make_sparse=False,
             except np.linalg.LinAlgError:
                 continue
             except ValueError as e:
-                if "Floating-point under-/overflow occurred at epoch" in \
-                        e.args[0]:
+                if "Floating-point under-/overflow occurred at epoch" in e.args[0]:
                     continue
                 elif "removed all features" in e.args[0]:
                     continue
@@ -209,13 +234,17 @@ def _test_configurations(self, configurations_space, make_sparse=False,
                     continue
                 elif "Numerical problems in QDA" in e.args[0]:
                     continue
-                elif 'Bug in scikit-learn' in e.args[0]:
+                elif "Bug in scikit-learn" in e.args[0]:
                     continue
-                elif 'The condensed distance matrix must contain only finite ' \
-                     'values.' in e.args[0]:
+                elif (
+                    "The condensed distance matrix must contain only finite "
+                    "values." in e.args[0]
+                ):
                     continue
-                elif "zero-size array to reduction operation maximum which has no " \
-                     "identity" in e.args[0]:
+                elif (
+                    "zero-size array to reduction operation maximum which has no "
+                    "identity" in e.args[0]
+                ):
                     continue
                 else:
                     e.args += (f"config={config}",)
@@ -244,7 +273,10 @@ def _test_configurations(self, configurations_space, make_sparse=False,
                     raise e
 
             except Exception as e:
-                if "Multiple input features cannot have the same target value" in e.args[0]:
+                if (
+                    "Multiple input features cannot have the same target value"
+                    in e.args[0]
+                ):
                     continue
                 else:
                     e.args += (f"config={config}",)
@@ -252,7 +284,7 @@ def _test_configurations(self, configurations_space, make_sparse=False,
 
     def test_default_configuration(self):
         for i in range(2):
-            X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
+            X_train, Y_train, X_test, Y_test = get_dataset(dataset="diabetes")
             auto = SimpleRegressionPipeline(random_state=1)
             auto = auto.fit(X_train, Y_train)
             predictions = auto.predict(copy.deepcopy(X_test))
@@ -266,16 +298,15 @@ def test_default_configuration_iterative_fit(self):
         regressor = SimpleRegressionPipeline(
             random_state=1,
             include={
-                'regressor': ['random_forest'],
-                'feature_preprocessor': ['no_preprocessing']
-            }
+                "regressor": ["random_forest"],
+                "feature_preprocessor": ["no_preprocessing"],
+            },
         )
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="boston")
         regressor.fit_transformer(X_train, Y_train)
         for i in range(1, 11):
             regressor.iterative_fit(X_train, Y_train)
-            self.assertEqual(regressor.steps[-1][-1].choice.estimator.n_estimators,
-                             i)
+            self.assertEqual(regressor.steps[-1][-1].choice.estimator.n_estimators, i)
 
     def test_repr(self):
         representation = repr(SimpleRegressionPipeline())
@@ -293,56 +324,50 @@ def test_get_hyperparameter_search_space(self):
         self.assertEqual(len(forbiddens), 35)
 
     def test_get_hyperparameter_search_space_include_exclude_models(self):
-        regressor = SimpleRegressionPipeline(
-            include={'regressor': ['random_forest']}
-        )
+        regressor = SimpleRegressionPipeline(include={"regressor": ["random_forest"]})
         cs = regressor.get_hyperparameter_search_space()
         self.assertEqual(
-            cs.get_hyperparameter('regressor:__choice__'),
-            CategoricalHyperparameter('regressor:__choice__', ['random_forest']),
+            cs.get_hyperparameter("regressor:__choice__"),
+            CategoricalHyperparameter("regressor:__choice__", ["random_forest"]),
         )
 
         # TODO add this test when more than one regressor is present
-        regressor = SimpleRegressionPipeline(
-            exclude={'regressor': ['random_forest']}
-        )
+        regressor = SimpleRegressionPipeline(exclude={"regressor": ["random_forest"]})
         cs = regressor.get_hyperparameter_search_space()
-        self.assertNotIn('random_forest', str(cs))
+        self.assertNotIn("random_forest", str(cs))
 
-        regressor = SimpleRegressionPipeline(
-            include={'feature_preprocessor': ['pca']}
-        )
+        regressor = SimpleRegressionPipeline(include={"feature_preprocessor": ["pca"]})
         cs = regressor.get_hyperparameter_search_space()
-        self.assertEqual(cs.get_hyperparameter(
-            'feature_preprocessor:__choice__'),
-            CategoricalHyperparameter('feature_preprocessor:__choice__', ['pca']))
+        self.assertEqual(
+            cs.get_hyperparameter("feature_preprocessor:__choice__"),
+            CategoricalHyperparameter("feature_preprocessor:__choice__", ["pca"]),
+        )
 
         regressor = SimpleRegressionPipeline(
-            exclude={'feature_preprocessor': ['no_preprocessing']}
+            exclude={"feature_preprocessor": ["no_preprocessing"]}
         )
         cs = regressor.get_hyperparameter_search_space()
-        self.assertNotIn('no_preprocessing', str(cs))
+        self.assertNotIn("no_preprocessing", str(cs))
 
-    def test_get_hyperparameter_search_space_preprocessor_contradicts_default_classifier(
-        self
+    def test_get_hyperparameter_search_space_preprocessor_contradicts_default(
+        self,
     ):
         regressor = SimpleRegressionPipeline(
-            include={'feature_preprocessor': ['densifier']},
-            dataset_properties={'sparse': True}
+            include={"feature_preprocessor": ["densifier"]},
+            dataset_properties={"sparse": True},
         )
         cs = regressor.get_hyperparameter_search_space()
         self.assertEqual(
-            cs.get_hyperparameter('regressor:__choice__').default_value,
-            'gradient_boosting'
+            cs.get_hyperparameter("regressor:__choice__").default_value,
+            "gradient_boosting",
         )
 
         regressor = SimpleRegressionPipeline(
-            include={'feature_preprocessor': ['nystroem_sampler']}
+            include={"feature_preprocessor": ["nystroem_sampler"]}
         )
         cs = regressor.get_hyperparameter_search_space()
         self.assertEqual(
-            cs.get_hyperparameter('regressor:__choice__').default_value,
-            'sgd'
+            cs.get_hyperparameter("regressor:__choice__").default_value, "sgd"
         )
 
     def test_get_hyperparameter_search_space_only_forbidden_combinations(self):
@@ -351,9 +376,9 @@ def test_get_hyperparameter_search_space_only_forbidden_combinations(self):
             "Cannot find a legal default configuration.",
             SimpleRegressionPipeline,
             include={
-                'regressor': ['random_forest'],
-                'feature_preprocessor': ['kitchen_sinks']
-            }
+                "regressor": ["random_forest"],
+                "feature_preprocessor": ["kitchen_sinks"],
+            },
         )
 
         # It must also be catched that no classifiers which can handle sparse
@@ -363,14 +388,16 @@ def test_get_hyperparameter_search_space_only_forbidden_combinations(self):
             "Cannot find a legal default configuration",
             SimpleRegressionPipeline,
             include={
-                'regressor': ['extra_trees'],
-                'feature_preprocessor': ['densifier']
+                "regressor": ["extra_trees"],
+                "feature_preprocessor": ["densifier"],
             },
-            dataset_properties={'sparse': True}
+            dataset_properties={"sparse": True},
         )
 
-    @unittest.skip("test_get_hyperparameter_search_space_dataset_properties" +
-                   " Not yet Implemented")
+    @unittest.skip(
+        "test_get_hyperparameter_search_space_dataset_properties"
+        + " Not yet Implemented"
+    )
     def test_get_hyperparameter_search_space_dataset_properties(self):
         # TODO: We do not have any dataset properties for regression, so this
         # test is somewhat stupid
@@ -403,16 +430,14 @@ def test_get_hyperparameter_search_space_dataset_properties(self):
     """
 
     def test_predict_batched(self):
-        include = {'regressor': ['decision_tree']}
+        include = {"regressor": ["decision_tree"]}
         cs = SimpleRegressionPipeline(include=include).get_hyperparameter_search_space()
         default = cs.get_default_configuration()
         regressor = SimpleRegressionPipeline(
-            config=default,
-            random_state=1,
-            include=include
+            config=default, random_state=1, include=include
         )
 
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="boston")
         regressor.fit(X_train, Y_train)
         X_test_ = X_test.copy()
         prediction_ = regressor.predict(X_test_)
@@ -424,12 +449,11 @@ def test_predict_batched(self):
         np.testing.assert_array_almost_equal(prediction_, prediction)
 
     def test_predict_batched_sparse(self):
-        dataset_properties = {'sparse': True}
-        include = {'regressor': ['decision_tree']}
+        dataset_properties = {"sparse": True}
+        include = {"regressor": ["decision_tree"]}
 
         cs = SimpleRegressionPipeline(
-            dataset_properties=dataset_properties,
-            include=include
+            dataset_properties=dataset_properties, include=include
         ).get_hyperparameter_search_space()
 
         default = cs.get_default_configuration()
@@ -437,11 +461,12 @@ def test_predict_batched_sparse(self):
             config=default,
             random_state=1,
             dataset_properties=dataset_properties,
-            include=include
+            include=include,
         )
 
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
-                                                       make_sparse=True)
+        X_train, Y_train, X_test, Y_test = get_dataset(
+            dataset="boston", make_sparse=True
+        )
         regressor.fit(X_train, Y_train)
         X_test_ = X_test.copy()
         prediction_ = regressor.predict(X_test_)
@@ -465,7 +490,7 @@ def test_validate_input_Y(self):
         raise NotImplementedError()
 
     def test_pipeline_clonability(self):
-        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
+        X_train, Y_train, X_test, Y_test = get_dataset(dataset="boston")
         auto = SimpleRegressionPipeline(random_state=1)
         auto = auto.fit(X_train, Y_train)
         auto_clone = clone(auto)
@@ -494,7 +519,9 @@ def test_set_params(self):
     def test_get_params(self):
         pass
 
-    def _test_set_hyperparameter_choice(self, expected_key, implementation, config_dict):
+    def _test_set_hyperparameter_choice(
+        self, expected_key, implementation, config_dict
+    ):
         """
         Given a configuration in config, this procedure makes sure that
         the given implementation, which should be a Choice component, honors
@@ -507,14 +534,16 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
 
         # Are there further hyperparams?
         # A choice component might have attribute requirements that we need to check
-        expected_sub_key = expected_key.replace(':__choice__', ':') + implementation_type
+        expected_sub_key = (
+            expected_key.replace(":__choice__", ":") + implementation_type
+        )
         expected_attributes = {}
-        if 'data_preprocessor:__choice__' in expected_key:
+        if "data_preprocessor:__choice__" in expected_key:
             # We have to check both the numerical and categorical
             to_check = {
-                'numerical_transformer': implementation.choice.numer_ppl.named_steps,
-                'categorical_transformer': implementation.choice.categ_ppl.named_steps,
-                'text_transformer': implementation.choice.txt_ppl.named_steps,
+                "numerical_transformer": implementation.choice.numer_ppl.named_steps,
+                "categorical_transformer": implementation.choice.categ_ppl.named_steps,
+                "text_transformer": implementation.choice.txt_ppl.named_steps,
             }
 
             for data_type, pipeline in to_check.items():
@@ -522,8 +551,7 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
                     # If it is a Choice, make sure it is the correct one!
                     if isinstance(sub_step, AutoSklearnChoice):
                         key = "data_preprocessor:feature_type:{}:{}:__choice__".format(
-                            data_type,
-                            sub_name
+                            data_type, sub_name
                         )
                         keys_checked.extend(
                             self._test_set_hyperparameter_choice(
@@ -535,10 +563,10 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
                         keys_checked.extend(
                             self._test_set_hyperparameter_component(
                                 "data_preprocessor:feature_type:{}:{}".format(
-                                    data_type,
-                                    sub_name
+                                    data_type, sub_name
                                 ),
-                                sub_step, config_dict
+                                sub_step,
+                                config_dict,
                             )
                         )
                     else:
@@ -547,7 +575,7 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
         else:
             for key, value in config_dict.items():
                 if key != expected_key and expected_sub_key in key:
-                    expected_attributes[key.split(':')[-1]] = value
+                    expected_attributes[key.split(":")[-1]] = value
                     keys_checked.append(key)
         if expected_attributes:
             attributes = vars(implementation.choice)
@@ -557,7 +585,9 @@ def _test_set_hyperparameter_choice(self, expected_key, implementation, config_d
                 self.assertIn(expected_attribute, attributes.keys())
         return keys_checked
 
-    def _test_set_hyperparameter_component(self, expected_key, implementation, config_dict):
+    def _test_set_hyperparameter_component(
+        self, expected_key, implementation, config_dict
+    ):
         """
         Given a configuration in config, this procedure makes sure that
         the given implementation, which should be a autosklearn component, honors
@@ -569,15 +599,14 @@ def _test_set_hyperparameter_component(self, expected_key, implementation, confi
         for key, value in config_dict.items():
             if expected_key in key:
                 keys_checked.append(key)
-                key = key.replace(expected_key + ':', '')
-                if ':' in key:
-                    raise ValueError("This utility should only be called with a "
-                                     "matching string that produces leaf configurations, "
-                                     "that is no further colons are expected, yet key={}"
-                                     "".format(
-                                            key
-                                        )
-                                     )
+                key = key.replace(expected_key + ":", "")
+                if ":" in key:
+                    raise ValueError(
+                        "This utility should only be called with a "
+                        "matching string that produces leaf configurations, "
+                        "that is no further colons are expected, yet key={}"
+                        "".format(key)
+                    )
                 expected_attributes[key] = value
         # Cannot check the whole dictionary, just names, as some
         # classes map the text hyperparameter directly to a function!
@@ -598,12 +627,17 @@ def test_set_hyperparameters_honors_configuration(self):
         """
 
         all_combinations = list(itertools.product([True, False], repeat=4))
-        for sparse, multilabel, signed, multiclass, in all_combinations:
+        for (
+            sparse,
+            multilabel,
+            signed,
+            multiclass,
+        ) in all_combinations:
             dataset_properties = {
-                'sparse': sparse,
-                'multilabel': multilabel,
-                'multiclass': multiclass,
-                'signed': signed,
+                "sparse": sparse,
+                "multilabel": multilabel,
+                "multiclass": multiclass,
+                "signed": signed,
             }
             random_state = 1
             auto = SimpleRegressionPipeline(
@@ -623,31 +657,32 @@ def test_set_hyperparameters_honors_configuration(self):
             keys_checked = []
 
             for name, step in auto.named_steps.items():
-                if name == 'data_preprocessor':
+                if name == "data_preprocessor":
                     keys_checked.extend(
                         self._test_set_hyperparameter_choice(
-                            'data_preprocessor:__choice__', step, config_dict
+                            "data_preprocessor:__choice__", step, config_dict
                         )
                     )
                     self.assertEqual(step.random_state, random_state)
-                elif name == 'feature_preprocessor':
+                elif name == "feature_preprocessor":
                     keys_checked.extend(
                         self._test_set_hyperparameter_choice(
-                            'feature_preprocessor:__choice__', step, config_dict
+                            "feature_preprocessor:__choice__", step, config_dict
                         )
                     )
                     self.assertEqual(step.random_state, random_state)
-                elif name == 'regressor':
+                elif name == "regressor":
                     keys_checked.extend(
                         self._test_set_hyperparameter_choice(
-                            'regressor:__choice__', step, config_dict
+                            "regressor:__choice__", step, config_dict
                         )
                     )
                     self.assertEqual(step.random_state, random_state)
                 else:
-                    raise ValueError("Found another type of step! Need to update this check"
-                                     " {}. ".format(name)
-                                     )
+                    raise ValueError(
+                        "Found another type of step! Need to update this check"
+                        " {}. ".format(name)
+                    )
 
             # Make sure we checked the whole configuration
             self.assertSetEqual(set(config_dict.keys()), set(keys_checked))
diff --git a/test/test_scripts/test_metadata_generation.py b/test/test_scripts/test_metadata_generation.py
index 6cc4fad38d..6c6ba70ef5 100644
--- a/test/test_scripts/test_metadata_generation.py
+++ b/test/test_scripts/test_metadata_generation.py
@@ -13,27 +13,29 @@
 
 
 class TestMetadataGeneration(unittest.TestCase):
-
     def setUp(self):
-        self.working_directory = '/tmp/autosklearn-unittest-tmp-dir-%s-%d-%d' % (
-            socket.gethostname(), os.getpid(), random.randint(0, 1000000))
+        self.working_directory = "/tmp/autosklearn-unittest-tmp-dir-%s-%d-%d" % (
+            socket.gethostname(),
+            os.getpid(),
+            random.randint(0, 1000000),
+        )
 
     def print_files(self):
-        print('Existing files:')
+        print("Existing files:")
         for dirpath, dirnames, filenames in os.walk(self.working_directory):
             print(dirpath, dirnames, filenames)
 
     def test_metadata_generation(self):
 
         regression_task_id = 360029
-        regression_dataset_name = 'SWD'.lower()
+        regression_dataset_name = "SWD".lower()
         classification_task_id = 245
-        classification_dataset_name = 'breast-w'.lower()
+        classification_dataset_name = "breast-w".lower()
 
         current_directory = __file__
-        scripts_directory = os.path.abspath(os.path.join(current_directory,
-                                                         '..', '..', '..',
-                                                         'scripts'))
+        scripts_directory = os.path.abspath(
+            os.path.join(current_directory, "..", "..", "..", "scripts")
+        )
 
         # 1. create working directory
         try:
@@ -44,214 +46,293 @@ def test_metadata_generation(self):
         # 2. should be done by the person running the unit tests!
 
         # 3. create configuration commands
-        script_filename = os.path.join(scripts_directory, '01_create_commands.py')
-        cmd = 'python3 %s --working-directory %s --test' % (script_filename, self.working_directory)
-        rval = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        script_filename = os.path.join(scripts_directory, "01_create_commands.py")
+        cmd = "python3 %s --working-directory %s --test" % (
+            script_filename,
+            self.working_directory,
+        )
+        rval = subprocess.run(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
         self.assertEqual(rval.returncode, 0, msg=str(rval))
 
         # 4. run one of the commands to get some data
-        commands_output_file = os.path.join(self.working_directory, 'metadata_commands.txt')
+        commands_output_file = os.path.join(
+            self.working_directory, "metadata_commands.txt"
+        )
         self.assertTrue(os.path.exists(commands_output_file))
 
         with open(commands_output_file) as fh:
-            cmds = fh.read().split('\n')
-            # 6 regression, 7 classification (roc_auc + task 258 is illegal), 1 empty line
-            self.assertEqual(len(cmds), 18, msg='\n'.join(cmds))
+            cmds = fh.read().split("\n")
+            # 6 regression, 7 classification (roc_auc + task 258 is illegal),
+            # 1 empty line
+            self.assertEqual(len(cmds), 18, msg="\n".join(cmds))
 
         for task_id, dataset_name, task_type, metric in (
             (
                 classification_task_id,
                 classification_dataset_name,
-                'classification',
-                'balanced_accuracy',
+                "classification",
+                "balanced_accuracy",
             ),
-            (regression_task_id, regression_dataset_name, 'regression', 'r2')
+            (regression_task_id, regression_dataset_name, "regression", "r2"),
         ):
             cmd = None
             with open(commands_output_file) as fh:
                 while True:
                     cmd = fh.readline()
-                    if 'task-id %d' % task_id in cmd and metric in cmd:
+                    if "task-id %d" % task_id in cmd and metric in cmd:
                         break
             if cmd is None:
-                self.fail('Did not find a command for task_id %s and metric %s in %s'
-                          % (task_id, metric, cmds))
+                self.fail(
+                    "Did not find a command for task_id %s and metric %s in %s"
+                    % (task_id, metric, cmds)
+                )
 
-            self.assertIn('time-limit 86400', cmd)
-            self.assertIn('per-run-time-limit 1800', cmd)
-            cmd = cmd.replace('time-limit 86400', 'time-limit 60').replace(
-                'per-run-time-limit 1800', 'per-run-time-limit 5')
+            self.assertIn("time-limit 86400", cmd)
+            self.assertIn("per-run-time-limit 1800", cmd)
+            cmd = cmd.replace("time-limit 86400", "time-limit 60").replace(
+                "per-run-time-limit 1800", "per-run-time-limit 5"
+            )
             # This tells the script to use the same memory limit for testing as
             # for training. In production, it would use twice as much!
-            cmd = cmd.replace('-s 1', '-s 1 --unittest')
-            print('COMMAND: %s' % cmd)
-            rval = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE,
-                                  stderr=subprocess.PIPE)
-            print('STDOUT: %s' % repr(rval.stdout), flush=True)
-            print('STDERR: %s' % repr(rval.stderr), flush=True)
+            cmd = cmd.replace("-s 1", "-s 1 --unittest")
+            print("COMMAND: %s" % cmd)
+            rval = subprocess.run(
+                cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            )
+            print("STDOUT: %s" % repr(rval.stdout), flush=True)
+            print("STDERR: %s" % repr(rval.stderr), flush=True)
 
             self.print_files()
 
-            expected_output_directory = os.path.join(self.working_directory,
-                                                     'configuration',
-                                                     task_type,
-                                                     str(task_id), metric,
-                                                     'auto-sklearn-output')
-            self.assertTrue(os.path.exists(expected_output_directory),
-                            msg=expected_output_directory)
-            smac_log = os.path.join(expected_output_directory, 'AutoML(1):%s.log' % dataset_name)
+            expected_output_directory = os.path.join(
+                self.working_directory,
+                "configuration",
+                task_type,
+                str(task_id),
+                metric,
+                "auto-sklearn-output",
+            )
+            self.assertTrue(
+                os.path.exists(expected_output_directory), msg=expected_output_directory
+            )
+            smac_log = os.path.join(
+                expected_output_directory, "AutoML(1):%s.log" % dataset_name
+            )
             with open(smac_log) as fh:
                 smac_output = fh.read()
-            self.assertEqual(rval.returncode, 0, msg=str(rval) + '\n' + smac_output)
-            expected_validation_output = os.path.join(expected_output_directory, '..',
-                                                      'validation_trajectory_1.json')
+            self.assertEqual(rval.returncode, 0, msg=str(rval) + "\n" + smac_output)
+            expected_validation_output = os.path.join(
+                expected_output_directory, "..", "validation_trajectory_1.json"
+            )
             self.assertTrue(os.path.exists(expected_validation_output))
-            trajectory = os.path.join(expected_output_directory,
-                                      'smac3-output', 'run_1', 'trajectory.json')
+            trajectory = os.path.join(
+                expected_output_directory, "smac3-output", "run_1", "trajectory.json"
+            )
 
             with open(expected_validation_output) as fh_validation:
                 with open(trajectory) as fh_trajectory:
                     traj = json.load(fh_trajectory)
                     valid_traj = json.load(fh_validation)
-                    print('Validation trajectory:')
+                    print("Validation trajectory:")
                     print(valid_traj)
                     self.assertGreater(len(traj), 2, msg=str(valid_traj))
                     self.assertEqual(len(traj), len(valid_traj), msg=str(valid_traj))
                     for entry in valid_traj:
-                        if task_type == 'classification':
+                        if task_type == "classification":
                             for metric in CLASSIFICATION_METRICS:
                                 # This is a multilabel metric
-                                if metric in ('precision_samples', 'recall_samples', 'f1_samples'):
+                                if metric in (
+                                    "precision_samples",
+                                    "recall_samples",
+                                    "f1_samples",
+                                ):
                                     continue
                                 self.assertIn(metric, entry[-1])
                                 self.assertIsInstance(entry[-1][metric], float)
-                                self.assertTrue(np.isfinite(entry[-1][metric]),
-                                                (metric, str(entry[-1][metric])))
+                                self.assertTrue(
+                                    np.isfinite(entry[-1][metric]),
+                                    (metric, str(entry[-1][metric])),
+                                )
                         else:
                             for metric in REGRESSION_METRICS:
                                 self.assertIn(metric, entry[-1])
                                 self.assertIsInstance(entry[-1][metric], float)
-                                self.assertTrue(np.isfinite(entry[-1][metric]),
-                                                (metric, str(entry[-1][metric])))
+                                self.assertTrue(
+                                    np.isfinite(entry[-1][metric]),
+                                    (metric, str(entry[-1][metric])),
+                                )
 
         # 5. Get the test performance of these configurations
-        script_filename = os.path.join(scripts_directory, '02_retrieve_metadata.py')
-        cmd = 'python3 %s --working-directory %s ' % (script_filename, self.working_directory)
-        print('COMMAND: %s' % cmd)
-        rval = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE,
-                              stderr=subprocess.PIPE)
-        print('STDOUT: %s' % repr(rval.stdout), flush=True)
-        print('STDERR: %s' % repr(rval.stderr), flush=True)
+        script_filename = os.path.join(scripts_directory, "02_retrieve_metadata.py")
+        cmd = "python3 %s --working-directory %s " % (
+            script_filename,
+            self.working_directory,
+        )
+        print("COMMAND: %s" % cmd)
+        rval = subprocess.run(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+        print("STDOUT: %s" % repr(rval.stdout), flush=True)
+        print("STDERR: %s" % repr(rval.stderr), flush=True)
         self.assertEqual(rval.returncode, 0, msg=str(rval))
 
-        for file in ['algorithm_runs.arff', 'configurations.csv', 'description.results.txt']:
-            for metric in ['accuracy', 'balanced_accuracy', 'log_loss', 'roc_auc']:
+        for file in [
+            "algorithm_runs.arff",
+            "configurations.csv",
+            "description.results.txt",
+        ]:
+            for metric in ["accuracy", "balanced_accuracy", "log_loss", "roc_auc"]:
                 path = os.path.join(
                     self.working_directory,
-                    'configuration_results',
-                    '%s_binary.classification_dense' % metric,
+                    "configuration_results",
+                    "%s_binary.classification_dense" % metric,
                     file,
                 )
                 self.assertTrue(os.path.exists(path), msg=path)
 
-        for file in ['algorithm_runs.arff', 'configurations.csv', 'description.results.txt']:
-            for metric in ['r2', 'mean_squared_error']:
+        for file in [
+            "algorithm_runs.arff",
+            "configurations.csv",
+            "description.results.txt",
+        ]:
+            for metric in ["r2", "mean_squared_error"]:
                 path = os.path.join(
                     self.working_directory,
-                    'configuration_results',
-                    '%s_regression_dense' % metric,
+                    "configuration_results",
+                    "%s_regression_dense" % metric,
                     file,
                 )
                 self.assertTrue(os.path.exists(path), msg=path)
 
         # 6. Calculate metafeatures
-        script_filename = os.path.join(scripts_directory, '03_calculate_metafeatures.py')
-        cmd = (
-            'python3 %s --working-directory %s --test-mode '
-            % (script_filename, self.working_directory)
+        script_filename = os.path.join(
+            scripts_directory, "03_calculate_metafeatures.py"
+        )
+        cmd = "python3 %s --working-directory %s --test-mode " % (
+            script_filename,
+            self.working_directory,
+        )
+        rval = subprocess.run(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
         )
-        rval = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE,
-                              stderr=subprocess.PIPE)
         self.assertEqual(rval.returncode, 0, msg=str(rval))
-        for task_type in ('classification', 'regression'):
-            for file in ['calculation_times.csv', 'description.features.txt',
-                         'feature_costs.arff', 'feature_runstatus.arff',
-                         'feature_values.arff']:
+        for task_type in ("classification", "regression"):
+            for file in [
+                "calculation_times.csv",
+                "description.features.txt",
+                "feature_costs.arff",
+                "feature_runstatus.arff",
+                "feature_values.arff",
+            ]:
                 self.assertTrue(
-                    os.path.exists(os.path.join(
-                        self.working_directory,
-                        'metafeatures',
-                        task_type,
-                        file)
+                    os.path.exists(
+                        os.path.join(
+                            self.working_directory, "metafeatures", task_type, file
+                        )
                     )
                 )
 
         with open(
             os.path.join(
-                self.working_directory, 'metafeatures', 'regression', 'feature_values.arff'
+                self.working_directory,
+                "metafeatures",
+                "regression",
+                "feature_values.arff",
             )
         ) as fh:
-            metafeatures_arff = fh.read().split('\n')
+            metafeatures_arff = fh.read().split("\n")
             contains_regression_id = False
             for line in metafeatures_arff:
-                if line.startswith('fri_c4_500_25,'):
+                if line.startswith("fri_c4_500_25,"):
                     contains_regression_id = True
             self.assertTrue(contains_regression_id, msg=metafeatures_arff)
 
         with open(
-                os.path.join(
-                    self.working_directory, 'metafeatures', 'classification', 'feature_values.arff'
-                )
+            os.path.join(
+                self.working_directory,
+                "metafeatures",
+                "classification",
+                "feature_values.arff",
+            )
         ) as fh:
-            metafeatures_arff = fh.read().split('\n')
+            metafeatures_arff = fh.read().split("\n")
             contains_classification_id = False
             for line in metafeatures_arff:
-                if line.startswith('anneal,'):
+                if line.startswith("anneal,"):
                     contains_classification_id = True
             self.assertTrue(contains_classification_id, msg=metafeatures_arff)
 
         # 7. Create aslib files
-        script_filename = os.path.join(scripts_directory, '04_create_aslib_files.py')
-        cmd = 'python3 %s --working-directory %s ' % (
-            script_filename, self.working_directory)
-        rval = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE,
-                              stderr=subprocess.PIPE)
+        script_filename = os.path.join(scripts_directory, "04_create_aslib_files.py")
+        cmd = "python3 %s --working-directory %s " % (
+            script_filename,
+            self.working_directory,
+        )
+        rval = subprocess.run(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
         self.assertEqual(rval.returncode, 0, msg=str(rval))
 
         for metric_, combination in (
-            (metric, '%s_binary.classification_dense' % metric),
-            (metric, '%s_regression_dense' % metric),
+            (metric, "%s_binary.classification_dense" % metric),
+            (metric, "%s_regression_dense" % metric),
         ):
 
             if task_type not in combination:
                 continue
 
-            for file in ['algorithm_runs.arff', 'configurations.csv',
-                         'description.txt', 'feature_costs.arff',
-                         'feature_runstatus.arff', 'feature_values.arff',
-                         'readme.txt']:
+            for file in [
+                "algorithm_runs.arff",
+                "configurations.csv",
+                "description.txt",
+                "feature_costs.arff",
+                "feature_runstatus.arff",
+                "feature_values.arff",
+                "readme.txt",
+            ]:
                 expected_path = os.path.join(
-                    self.working_directory, 'metadata', combination, file,
+                    self.working_directory,
+                    "metadata",
+                    combination,
+                    file,
                 )
                 self.assertTrue(os.path.exists(expected_path), msg=expected_path)
 
-            with open(os.path.join(self.working_directory,
-                                   'metadata',
-                                   combination,
-                                   'algorithm_runs.arff')) as fh:
+            with open(
+                os.path.join(
+                    self.working_directory,
+                    "metadata",
+                    combination,
+                    "algorithm_runs.arff",
+                )
+            ) as fh:
                 algorithm_runs = arff.load(fh)
-                self.assertEqual(algorithm_runs['attributes'],
-                                 [('instance_id', 'STRING'),
-                                  ('repetition', 'NUMERIC'),
-                                  ('algorithm', 'STRING'),
-                                  (metric_, 'NUMERIC'),
-                                  ('runstatus',
-                                   ['ok', 'timeout', 'memout', 'not_applicable',
-                                    'crash', 'other'])])
-                self.assertEqual(len(algorithm_runs['data']), 1)
-                self.assertEqual(len(algorithm_runs['data'][0]), 5)
-                self.assertLess(algorithm_runs['data'][0][3], 0.9)
-                self.assertEqual(algorithm_runs['data'][0][4], 'ok')
+                self.assertEqual(
+                    algorithm_runs["attributes"],
+                    [
+                        ("instance_id", "STRING"),
+                        ("repetition", "NUMERIC"),
+                        ("algorithm", "STRING"),
+                        (metric_, "NUMERIC"),
+                        (
+                            "runstatus",
+                            [
+                                "ok",
+                                "timeout",
+                                "memout",
+                                "not_applicable",
+                                "crash",
+                                "other",
+                            ],
+                        ),
+                    ],
+                )
+                self.assertEqual(len(algorithm_runs["data"]), 1)
+                self.assertEqual(len(algorithm_runs["data"][0]), 5)
+                self.assertLess(algorithm_runs["data"][0][3], 0.9)
+                self.assertEqual(algorithm_runs["data"][0][4], "ok")
 
     def tearDown(self):
         for i in range(5):
diff --git a/test/test_util/__init__.py b/test/test_util/__init__.py
index cc3cd7becd..e298f0f075 100644
--- a/test/test_util/__init__.py
+++ b/test/test_util/__init__.py
@@ -1,2 +1,2 @@
 # -*- encoding: utf-8 -*-
-__author__ = 'feurerm'
+__author__ = "feurerm"
diff --git a/test/test_util/test_StopWatch.py b/test/test_util/test_StopWatch.py
index 14038c6820..d45ecbf55d 100644
--- a/test/test_util/test_StopWatch.py
+++ b/test/test_util/test_StopWatch.py
@@ -22,8 +22,8 @@ def test_stopwatch_overhead(self):
         cpu_start = time.process_time()
         watch = StopWatch()
         for i in range(1, 1000):
-            watch.start_task('task_%d' % i)
-            watch.stop_task('task_%d' % i)
+            watch.start_task("task_%d" % i)
+            watch.stop_task("task_%d" % i)
         cpu_stop = time.process_time()
         stop = time.time()
         dur = stop - start
@@ -36,6 +36,6 @@ def test_stopwatch_overhead(self):
         self.assertLess(watch.cpu_sum(), 2 * watch.wall_sum())
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # import sys;sys.argv = ['', 'Test.testName']
     unittest.main()
diff --git a/test/test_util/test_backend.py b/test/test_util/test_backend.py
index a029aef4bb..0673370b97 100644
--- a/test/test_util/test_backend.py
+++ b/test/test_util/test_backend.py
@@ -7,48 +7,48 @@
 
 
 class BackendModelsTest(unittest.TestCase):
-
     class BackendStub(Backend):
-
         def __init__(self):
             self.__class__ = Backend
 
     def setUp(self):
         self.backend = self.BackendStub()
-        self.backend.internals_directory = '/'
+        self.backend.internals_directory = "/"
 
-    @unittest.mock.patch('pickle.load')
-    @unittest.mock.patch('os.path.exists')
+    @unittest.mock.patch("pickle.load")
+    @unittest.mock.patch("os.path.exists")
     def test_load_model_by_seed_and_id(self, exists_mock, pickleLoadMock):
         exists_mock.return_value = False
-        open_mock = unittest.mock.mock_open(read_data='Data')
+        open_mock = unittest.mock.mock_open(read_data="Data")
         with unittest.mock.patch(
-            'autosklearn.automl_common.common.utils.backend.open',
+            "autosklearn.automl_common.common.utils.backend.open",
             open_mock,
             create=True,
         ):
             seed = 13
             idx = 17
             budget = 50.0
-            expected_model = self._setup_load_model_mocks(open_mock,
-                                                          pickleLoadMock,
-                                                          seed, idx, budget)
+            expected_model = self._setup_load_model_mocks(
+                open_mock, pickleLoadMock, seed, idx, budget
+            )
 
             actual_model = self.backend.load_model_by_seed_and_id_and_budget(
-                seed, idx, budget)
+                seed, idx, budget
+            )
 
             self.assertEqual(expected_model, actual_model)
 
-    @unittest.mock.patch('pickle.load')
-    @unittest.mock.patch.object(builtins, 'open')
-    @unittest.mock.patch('os.path.exists')
+    @unittest.mock.patch("pickle.load")
+    @unittest.mock.patch.object(builtins, "open")
+    @unittest.mock.patch("os.path.exists")
     def test_loads_models_by_identifiers(self, exists_mock, openMock, pickleLoadMock):
         exists_mock.return_value = True
         seed = 13
         idx = 17
         budget = 50.0
         expected_model = self._setup_load_model_mocks(
-            openMock, pickleLoadMock, seed, idx, budget)
+            openMock, pickleLoadMock, seed, idx, budget
+        )
         expected_dict = {(seed, idx, budget): expected_model}
 
         actual_dict = self.backend.load_models_by_identifiers([(seed, idx, budget)])
@@ -57,15 +57,25 @@ def test_loads_models_by_identifiers(self, exists_mock, openMock, pickleLoadMock
         self.assertDictEqual(expected_dict, actual_dict)
 
     def _setup_load_model_mocks(self, openMock, pickleLoadMock, seed, idx, budget):
-        model_path = '/runs/%s_%s_%s/%s.%s.%s.model' % (seed, idx, budget, seed, idx, budget)
-        file_handler = 'file_handler'
-        expected_model = 'model'
+        model_path = "/runs/%s_%s_%s/%s.%s.%s.model" % (
+            seed,
+            idx,
+            budget,
+            seed,
+            idx,
+            budget,
+        )
+        file_handler = "file_handler"
+        expected_model = "model"
 
         fileMock = unittest.mock.MagicMock()
         fileMock.__enter__.return_value = file_handler
 
-        openMock.side_effect = \
-            lambda path, flag: fileMock if path == model_path and flag == 'rb' else None
-        pickleLoadMock.side_effect = lambda fh: expected_model if fh == file_handler else None
+        openMock.side_effect = (
+            lambda path, flag: fileMock if path == model_path and flag == "rb" else None
+        )
+        pickleLoadMock.side_effect = (
+            lambda fh: expected_model if fh == file_handler else None
+        )
 
         return expected_model
diff --git a/test/test_util/test_common.py b/test/test_util/test_common.py
index 740608969d..33fa4cee31 100644
--- a/test/test_util/test_common.py
+++ b/test/test_util/test_common.py
@@ -18,5 +18,5 @@ def test_check_pid(self):
         self.assertFalse(exists)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_util/test_data.py b/test/test_util/test_data.py
index 87548b413f..2bceac804a 100644
--- a/test/test_util/test_data.py
+++ b/test/test_util/test_data.py
@@ -1,29 +1,33 @@
-from typing import Any, List, Dict, Union
-from itertools import chain
-import warnings
+from typing import Any, Dict, List, Union
 
-import pytest
+import warnings
+from itertools import chain
 
 import numpy as np
 import pandas as pd
+import pytest
 import sklearn.datasets
 from scipy.sparse import csr_matrix, spmatrix
 
 from autosklearn.constants import (
-    BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION,
-    REGRESSION, MULTIOUTPUT_REGRESSION, CLASSIFICATION_TASKS, REGRESSION_TASKS
+    BINARY_CLASSIFICATION,
+    CLASSIFICATION_TASKS,
+    MULTICLASS_CLASSIFICATION,
+    MULTILABEL_CLASSIFICATION,
+    MULTIOUTPUT_REGRESSION,
+    REGRESSION,
+    REGRESSION_TASKS,
 )
 from autosklearn.util.data import (
-    subsample,
+    default_dataset_compression_arg,
     reduce_dataset_size_if_too_large,
     reduce_precision,
     reduction_mapping,
+    subsample,
     supported_precision_reductions,
     validate_dataset_compression_arg,
-    default_dataset_compression_arg
 )
 
-
 parametrize = pytest.mark.parametrize
 
 
@@ -68,11 +72,14 @@ def test_validate_dataset_compression_arg_returns_with_memory_allocation(
     assert validate_arg["methods"] == expected_methods
 
 
-@parametrize("methods", [
-    ["precision"],
-    ["precision", "subsample"],
-    ["precision", "precision", "subsample"]
-])
+@parametrize(
+    "methods",
+    [
+        ["precision"],
+        ["precision", "subsample"],
+        ["precision", "precision", "subsample"],
+    ],
+)
 def test_validate_dataset_compression_arg_returns_with_same_methods(
     methods: List[str],
 ):
@@ -125,17 +132,14 @@ def test_validate_dataset_compression_arg_raises_error_with_bad_key(key: str):
     -------
     * Should raise a ValueError
     """
-    bad_arg = {
-        **default_dataset_compression_arg,
-        key: 1337
-    }
+    bad_arg = {**default_dataset_compression_arg, key: 1337}
     with pytest.raises(ValueError, match=r"Unknown key"):
         validate_dataset_compression_arg(bad_arg, memory_limit=10)
 
 
 @parametrize("memory_allocation", ["hello", {}, [1, 2, 3]])
 def test_validate_dataset_compression_arg_raises_error_with_bad_memory_allocation_type(
-    memory_allocation: Any
+    memory_allocation: Any,
 ):
     """
     Parameters
@@ -148,13 +152,15 @@ def test_validate_dataset_compression_arg_raises_error_with_bad_memory_allocatio
     * Should raise a ValueError
     """
     bad_arg = {"memory_allocation": memory_allocation}
-    with pytest.raises(ValueError, match=r"key 'memory_allocation' must be an `int` or `float`"):
+    with pytest.raises(
+        ValueError, match=r"key 'memory_allocation' must be an `int` or `float`"
+    ):
         validate_dataset_compression_arg(bad_arg, memory_limit=10)
 
 
 @parametrize("memory_allocation", [-0.5, 0.0, 1.0, 1.5])
 def test_validate_dataset_compression_arg_raises_error_with_bad_memory_allocation_float(
-    memory_allocation: float
+    memory_allocation: float,
 ):
     """
     Parameters
@@ -168,16 +174,17 @@ def test_validate_dataset_compression_arg_raises_error_with_bad_memory_allocatio
     """
     bad_arg = {"memory_allocation": memory_allocation}
 
-    with pytest.raises(ValueError, match=r"key 'memory_allocation' if float must be in \(0, 1\)"):
+    with pytest.raises(
+        ValueError, match=r"key 'memory_allocation' if float must be in \(0, 1\)"
+    ):
         validate_dataset_compression_arg(bad_arg, memory_limit=10)
 
 
-@parametrize("memory_allocation, memory_limit", [
-    (0, 10), (10, 10), (-20, 10), (20, 10)
-])
+@parametrize(
+    "memory_allocation, memory_limit", [(0, 10), (10, 10), (-20, 10), (20, 10)]
+)
 def test_validate_dataset_compression_arg_raises_error_with_bad_memory_allocation_int(
-    memory_allocation: int,
-    memory_limit: int
+    memory_allocation: int, memory_limit: int
 ):
     """
     Parameters
@@ -193,12 +200,16 @@ def test_validate_dataset_compression_arg_raises_error_with_bad_memory_allocatio
     * Should raise a ValueError
     """
     bad_arg = {"memory_allocation": memory_allocation}
-    with pytest.raises(ValueError, match=r"key 'memory_allocation' if int must be in \(0,"):
+    with pytest.raises(
+        ValueError, match=r"key 'memory_allocation' if int must be in \(0,"
+    ):
         validate_dataset_compression_arg(bad_arg, memory_limit=memory_limit)
 
 
 @parametrize("methods", [10, {"hello", "world"}, []])
-def test_validate_dataset_compression_arg_raises_error_with_bad_methods_type(methods: Any):
+def test_validate_dataset_compression_arg_raises_error_with_bad_methods_type(
+    methods: Any,
+):
     """
     Parameters
     ----------
@@ -214,12 +225,17 @@ def test_validate_dataset_compression_arg_raises_error_with_bad_methods_type(met
         validate_dataset_compression_arg(bad_arg, memory_limit=10)
 
 
-@parametrize("methods", [
-    ["bad", "worse"],
-    ["precision", "kind_of_bad"],
-    ["still_bad", "precision", "subsample"]
-])
-def test_validate_dataset_compression_arg_raises_error_with_bad_methods_entries(methods: Any):
+@parametrize(
+    "methods",
+    [
+        ["bad", "worse"],
+        ["precision", "kind_of_bad"],
+        ["still_bad", "precision", "subsample"],
+    ],
+)
+def test_validate_dataset_compression_arg_raises_error_with_bad_methods_entries(
+    methods: Any,
+):
     """
     Parameters
     ----------
@@ -235,11 +251,16 @@ def test_validate_dataset_compression_arg_raises_error_with_bad_methods_entries(
         validate_dataset_compression_arg(bad_arg, memory_limit=10)
 
 
-@parametrize("y", [
-    np.asarray(9999 * [0] + 1 * [1]),
-    np.asarray(4999 * [1] + 4999 * [2] + 1 * [3] + 1 * [4]),
-    np.asarray(4999 * [[0, 1, 1]] + 4999 * [[1, 1, 0]] + 1 * [[1, 0, 1]] + 1 * [[0, 0, 0]])
-])
+@parametrize(
+    "y",
+    [
+        np.asarray(9999 * [0] + 1 * [1]),
+        np.asarray(4999 * [1] + 4999 * [2] + 1 * [3] + 1 * [4]),
+        np.asarray(
+            4999 * [[0, 1, 1]] + 4999 * [[1, 1, 0]] + 1 * [[1, 0, 1]] + 1 * [[0, 0, 0]]
+        ),
+    ],
+)
 @parametrize("random_state", list(range(5)))
 def test_subsample_classification_unique_labels_stay_in_training_set(y, random_state):
     n_samples = len(y)
@@ -253,32 +274,37 @@ def test_subsample_classification_unique_labels_stay_in_training_set(y, random_s
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         X_sampled, y_sampled = subsample(
-            X, y,
+            X,
+            y,
             random_state=random_state,
             sample_size=sample_size,
-            is_classification=True
+            is_classification=True,
         )
 
     assert X_sampled.dtype == X.dtype and y_sampled.dtype == y.dtype
     assert len(y_sampled) == sample_size
-    assert all(label in y_sampled for label in unique_labels), \
-        f"sampled unique = {np.unique(y_sampled)}, original unique = {unique_labels}"
+    assert all(
+        label in y_sampled for label in unique_labels
+    ), f"sampled unique = {np.unique(y_sampled)}, original unique = {unique_labels}"
 
 
 @parametrize("X", [np.asarray([[1, 1, 1]] * 30)])
 @parametrize("x_type", [list, np.ndarray, csr_matrix, pd.DataFrame])
-@parametrize("y, task", [
-    (np.asarray([0] * 15 + [1] * 15), BINARY_CLASSIFICATION),
-    (np.asarray([0] * 10 + [1] * 10 + [2] * 10), MULTICLASS_CLASSIFICATION),
-    (np.asarray([[1, 0, 1]] * 30), MULTILABEL_CLASSIFICATION),
-    (np.asarray([1.0] * 30), REGRESSION),
-    (np.asarray([[1.0, 1.0, 1.0]] * 30), MULTIOUTPUT_REGRESSION),
-])
+@parametrize(
+    "y, task",
+    [
+        (np.asarray([0] * 15 + [1] * 15), BINARY_CLASSIFICATION),
+        (np.asarray([0] * 10 + [1] * 10 + [2] * 10), MULTICLASS_CLASSIFICATION),
+        (np.asarray([[1, 0, 1]] * 30), MULTILABEL_CLASSIFICATION),
+        (np.asarray([1.0] * 30), REGRESSION),
+        (np.asarray([[1.0, 1.0, 1.0]] * 30), MULTIOUTPUT_REGRESSION),
+    ],
+)
 @parametrize("y_type", [list, np.ndarray, pd.DataFrame, pd.Series])
 @parametrize("random_state", [0])
 @parametrize("sample_size", [0.25, 0.5, 5, 10])
 def test_subsample_validity(X, x_type, y, y_type, random_state, sample_size, task):
-    """ Asserts the validity of the function with all valid types
+    """Asserts the validity of the function with all valid types
 
     We want to make sure that `subsample` works correctly with all the types listed
     as x_type and y_type.
@@ -289,10 +315,10 @@ def test_subsample_validity(X, x_type, y, y_type, random_state, sample_size, tas
     """
     assert len(X) == len(y)  # Make sure our test data is correct
 
-    if (
-        y_type == pd.Series
-        and task in [MULTILABEL_CLASSIFICATION, MULTIOUTPUT_REGRESSION]
-    ):
+    if y_type == pd.Series and task in [
+        MULTILABEL_CLASSIFICATION,
+        MULTIOUTPUT_REGRESSION,
+    ]:
         # We can't have a pd.Series with multiple values as it's 1 dimensional
         pytest.skip("Can't have pd.Series as y when task is n-dimensional")
 
@@ -312,10 +338,11 @@ def convert(arr, objtype):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         X_sampled, y_sampled = subsample(
-            X, y,
+            X,
+            y,
             random_state=random_state,
             sample_size=sample_size,
-            is_classification=task in CLASSIFICATION_TASKS
+            is_classification=task in CLASSIFICATION_TASKS,
         )
 
     # Function to get the type of an obj
@@ -359,9 +386,11 @@ def size(obj):
         assert size(X_sampled) == sample_size
 
 
-@parametrize('X', [np.asarray([[0, 0, 1]] * 10)])
-@parametrize('dtype', supported_precision_reductions + [np.dtype('float32'), np.dtype('float64')])
-@parametrize('x_type', [np.ndarray, csr_matrix])
+@parametrize("X", [np.asarray([[0, 0, 1]] * 10)])
+@parametrize(
+    "dtype", supported_precision_reductions + [np.dtype("float32"), np.dtype("float64")]
+)
+@parametrize("x_type", [np.ndarray, csr_matrix])
 def test_reduce_precision_correctly_reduces_precision(X, dtype, x_type):
     X = X.astype(dtype)
     if x_type == csr_matrix:
@@ -376,13 +405,13 @@ def test_reduce_precision_correctly_reduces_precision(X, dtype, x_type):
     expected: Dict[type, type] = {
         np.float32: np.float32,
         np.float64: np.float32,
-        np.dtype('float32'): np.float32,
-        np.dtype('float64'): np.float32
+        np.dtype("float32"): np.float32,
+        np.dtype("float64"): np.float32,
     }
-    if hasattr(np, 'float96'):
+    if hasattr(np, "float96"):
         expected[np.float96] = np.float64
 
-    if hasattr(np, 'float128'):
+    if hasattr(np, "float128"):
         expected[np.float128] = np.float64
 
     assert precision == expected[dtype]
@@ -394,28 +423,40 @@ def test_reduce_precision_correctly_reduces_precision(X, dtype, x_type):
     assert type(X) == type(X_reduced)
 
 
-@parametrize('X', [np.asarray([0, 0, 1]) * 10])
-@parametrize('dtype', [np.int32, np.int64, np.complex128])
+@parametrize("X", [np.asarray([0, 0, 1]) * 10])
+@parametrize("dtype", [np.int32, np.int64, np.complex128])
 def test_reduce_precision_with_unsupported_dtypes(X, dtype):
     X = X.astype(dtype)
     with pytest.raises(ValueError) as err:
         reduce_precision(X)
 
-    expected = f"X.dtype = {X.dtype} not equal to any supported {supported_precision_reductions}"
+    expected = (
+        f"X.dtype = {X.dtype} not equal to any supported "
+        f"{supported_precision_reductions}"
+    )
+
     assert err.value.args[0] == expected
 
 
-@parametrize("X", [
-    np.ones((100000, 10), dtype=np.float64)  # Make it big for reductions to take place
-])
+@parametrize(
+    "X",
+    [
+        np.ones(
+            (100000, 10), dtype=np.float64
+        )  # Make it big for reductions to take place
+    ],
+)
 @parametrize("x_type", [csr_matrix, np.ndarray])
 @parametrize("dtype", supported_precision_reductions)
-@parametrize('y, is_classification', [
-    (np.ones((100000,)), True),
-    (np.ones((100000,)), False),
-])
-@parametrize('memory_allocation', [0.1, 1/5.2, 1/8, 1])
-@parametrize('operations', [['precision'], ['subsample'], ['precision', 'subsample']])
+@parametrize(
+    "y, is_classification",
+    [
+        (np.ones((100000,)), True),
+        (np.ones((100000,)), False),
+    ],
+)
+@parametrize("memory_allocation", [0.1, 1 / 5.2, 1 / 8, 1])
+@parametrize("operations", [["precision"], ["subsample"], ["precision", "subsample"]])
 def test_reduce_dataset_reduces_size_and_precision(
     X, x_type, dtype, y, is_classification, memory_allocation, operations
 ):
@@ -444,13 +485,13 @@ def bytes(arr):
         return arr.nbytes if isinstance(arr, np.ndarray) else arr.data.nbytes
 
     # If we expect some precision reduction unless at float32 already
-    if 'precision' in operations and dtype != np.float32:
+    if "precision" in operations and dtype != np.float32:
         expected = reduction_mapping[X.dtype]
         assert X_out.dtype == expected
         assert bytes(X_out) < bytes(X)
 
     # If we expect some subsampling
-    if 'subsample' in operations:
+    if "subsample" in operations:
         assert X_out.shape[0] < X.shape[0]
         assert y_out.shape[0] < y.shape[0]
         assert bytes(X_out) < bytes(X)
@@ -464,10 +505,10 @@ def test_reduce_dataset_invalid_dtype_for_precision_reduction():
         reduce_dataset_size_if_too_large(
             X=X,
             y=X,
-            operations=['precision'],
+            operations=["precision"],
             memory_limit=1,
             memory_allocation=0.1,
-            is_classification=False
+            is_classification=False,
         )
 
     expected_err = f"Unsupported type `{X.dtype}` for precision reduction"
@@ -485,7 +526,7 @@ def test_reduce_dataset_invalid_operations():
             operations=[invalid_op],
             memory_limit=1,
             memory_allocation=0.1,
-            is_classification=False
+            is_classification=False,
         )
 
     expected_err = f"Unknown operation `{invalid_op}`"
@@ -504,13 +545,15 @@ def test_reduce_dataset_invalid_memory_allocation_float(memory_allocation: float
     -------
     * Should raise a ValueError
     """
-    with pytest.raises(ValueError, match=r"memory_allocation if float must be in \(0, 1\)"):
+    with pytest.raises(
+        ValueError, match=r"memory_allocation if float must be in \(0, 1\)"
+    ):
         reduce_dataset_size_if_too_large(
             X=np.empty(1),
             y=np.empty(1),
             memory_limit=100,
             is_classification=True,
-            memory_allocation=memory_allocation
+            memory_allocation=memory_allocation,
         )
 
 
@@ -526,17 +569,19 @@ def test_reduce_dataset_invalid_memory_allocation_int(memory_allocation: int):
     -------
     * Should raise a ValueError
     """
-    with pytest.raises(ValueError, match=r"memory_allocation if int must be in \(0, memory_limit"):
+    with pytest.raises(
+        ValueError, match=r"memory_allocation if int must be in \(0, memory_limit"
+    ):
         reduce_dataset_size_if_too_large(
             X=np.empty(1),
             y=np.empty(1),
             is_classification=True,
             memory_limit=100,
-            memory_allocation=memory_allocation
+            memory_allocation=memory_allocation,
         )
 
 
-@parametrize("memory_allocation", ["100", {'a': 1}, [100]])
+@parametrize("memory_allocation", ["100", {"a": 1}, [100]])
 def test_reduce_dataset_invalid_memory_allocation_type(memory_allocation: Any):
     """
     Parameters
@@ -554,25 +599,30 @@ def test_reduce_dataset_invalid_memory_allocation_type(memory_allocation: Any):
             y=np.empty(1),
             memory_limit=100,
             is_classification=True,
-            memory_allocation=memory_allocation
+            memory_allocation=memory_allocation,
         )
 
 
 @pytest.mark.parametrize(
-    'memory_limit,precision,task',
+    "memory_limit,precision,task",
     [
         (memory_limit, precision, task)
         for task in chain(CLASSIFICATION_TASKS, REGRESSION_TASKS)
         for precision in (float, np.float32, np.float64, np.float128)
         for memory_limit in (1, 100)
-    ]
+    ],
 )
 def test_reduce_dataset_subsampling_explicit_values(memory_limit, precision, task):
     random_state = 0
     fixture = {
         BINARY_CLASSIFICATION: {
             1: {float: 2621, np.float32: 2621, np.float64: 2621, np.float128: 1310},
-            100: {float: 12000, np.float32: 12000, np.float64: 12000, np.float128: 12000},
+            100: {
+                float: 12000,
+                np.float32: 12000,
+                np.float64: 12000,
+                np.float128: 12000,
+            },
         },
         MULTICLASS_CLASSIFICATION: {
             1: {float: 409, np.float32: 409, np.float64: 409, np.float128: 204},
@@ -589,7 +639,7 @@ def test_reduce_dataset_subsampling_explicit_values(memory_limit, precision, tas
         MULTIOUTPUT_REGRESSION: {
             1: {float: 1310, np.float32: 1310, np.float64: 1310, np.float128: 655},
             100: {float: 5000, np.float32: 5000, np.float64: 5000, np.float128: 5000},
-        }
+        },
     }
 
     # Create the task and data
@@ -620,12 +670,13 @@ def test_reduce_dataset_subsampling_explicit_values(memory_limit, precision, tas
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         X_new, y_new = reduce_dataset_size_if_too_large(
-            X=X, y=y,
+            X=X,
+            y=y,
             random_state=random_state,
             memory_limit=memory_limit,
             is_classification=task in CLASSIFICATION_TASKS,
-            operations=['precision', 'subsample'],
-            memory_allocation=0.1
+            operations=["precision", "subsample"],
+            memory_allocation=0.1,
         )
 
     # Assert the new number of samples
diff --git a/test/test_util/test_dependencies.py b/test/test_util/test_dependencies.py
index 53b2285750..1c59dad51b 100644
--- a/test/test_util/test_dependencies.py
+++ b/test/test_util/test_dependencies.py
@@ -1,30 +1,31 @@
-import unittest
-import pkg_resources
 import re
-
-from unittest.mock import patch, Mock
+import unittest
+from unittest.mock import Mock, patch
 
 import numpy as np
+import pkg_resources
 
-from autosklearn.util.dependencies import verify_packages, MissingPackageError, \
-    IncorrectPackageVersionError
+from autosklearn.util.dependencies import (
+    IncorrectPackageVersionError,
+    MissingPackageError,
+    verify_packages,
+)
 
 
-@patch('pkg_resources.get_distribution')
+@patch("pkg_resources.get_distribution")
 class VerifyPackagesTests(unittest.TestCase):
-
     def test_existing_package(self, getDistributionMock):
-        requirement = 'package'
+        requirement = "package"
         distribution_mock = unittest.mock.Mock()
         getDistributionMock.return_value = distribution_mock
-        distribution_mock.version = '1.0.0'
+        distribution_mock.version = "1.0.0"
 
         verify_packages(requirement)
 
-        getDistributionMock.assert_called_once_with('package')
+        getDistributionMock.assert_called_once_with("package")
 
     def test_missing_package(self, getDistributionMock):
-        requirement = 'package'
+        requirement = "package"
 
         getDistributionMock.side_effect = pkg_resources.DistributionNotFound()
 
@@ -35,7 +36,7 @@ def test_missing_package(self, getDistributionMock):
             requirement,
         )
 
-    @patch('importlib.import_module')
+    @patch("importlib.import_module")
     def test_package_can_only_be_imported(self, import_mock, getDistributionMock):
 
         getDistributionMock.side_effect = pkg_resources.DistributionNotFound()
@@ -43,60 +44,64 @@ def test_package_can_only_be_imported(self, import_mock, getDistributionMock):
         package.__version__ = np.__version__
         import_mock.return_value = package
 
-        verify_packages('numpy')
+        verify_packages("numpy")
 
     def test_correct_package_versions(self, getDistributionMock):
-        requirement = 'package==0.1.2\n' \
-                      'package>0.1\n' \
-                      'package>=0.1'
+        requirement = "package==0.1.2\n" "package>0.1\n" "package>=0.1"
 
         moduleMock = Mock()
-        moduleMock.version = '0.1.2'
+        moduleMock.version = "0.1.2"
         getDistributionMock.return_value = moduleMock
 
         verify_packages(requirement)
 
-        getDistributionMock.assert_called_with('package')
+        getDistributionMock.assert_called_with("package")
         self.assertEqual(3, len(getDistributionMock.call_args_list))
 
     def test_wrong_package_version(self, getDistributionMock):
-        requirement = 'package>0.1.2'
+        requirement = "package>0.1.2"
 
         moduleMock = Mock()
-        moduleMock.version = '0.1.2'
+        moduleMock.version = "0.1.2"
         getDistributionMock.return_value = moduleMock
 
         self.assertRaisesRegex(
             IncorrectPackageVersionError,
-            re.escape("found 'package' version 0.1.2 but requires package version >0.1.2"),
+            re.escape(
+                "found 'package' version 0.1.2 but requires package version >0.1.2"
+            ),
             verify_packages,
             requirement,
-            )
+        )
 
     def test_outdated_requirement(self, getDistributionMock):
-        requirement = 'package>=0.1'
+        requirement = "package>=0.1"
 
         moduleMock = Mock()
-        moduleMock.version = '0.0.9'
+        moduleMock.version = "0.0.9"
         getDistributionMock.return_value = moduleMock
 
         self.assertRaisesRegex(
             IncorrectPackageVersionError,
-            re.escape("found 'package' version 0.0.9 but requires package version >=0.1"),
+            re.escape(
+                "found 'package' version 0.0.9 but requires package version >=0.1"
+            ),
             verify_packages,
             requirement,
-            )
+        )
 
     def test_too_fresh_requirement(self, getDistributionMock):
-        requirement = 'package==0.1.2'
+        requirement = "package==0.1.2"
 
         moduleMock = Mock()
-        moduleMock.version = '0.1.3'
+        moduleMock.version = "0.1.3"
         getDistributionMock.return_value = moduleMock
 
         self.assertRaisesRegex(
             IncorrectPackageVersionError,
-            re.escape("found 'package' version 0.1.3 but requires package version ==0.1.2"),
+            re.escape(
+                "found 'package' version 0.1.3 but requires package version ==0.1.2"
+            ),
             verify_packages,
             requirement,
-            )
+        )
diff --git a/test/test_util/test_logging.py b/test/test_util/test_logging.py
index 568593c7c8..d824aecc02 100644
--- a/test/test_util/test_logging.py
+++ b/test/test_util/test_logging.py
@@ -1,47 +1,46 @@
-import os
-import unittest
 import logging
 import logging.config
+import os
 import tempfile
-import yaml
+import unittest
 
+import yaml
 
 from autosklearn.util import logging_
 
 
 class LoggingTest(unittest.TestCase):
-
     def test_setup_logger(self):
         # Test that setup_logger function correctly configures the logger
         # according to the given dictionary, and uses the default
         # logging.yaml file if logging_config is not specified.
 
-        with open(os.path.join(os.path.dirname(__file__), 'example_config.yaml'), 'r') as fh:
+        with open(
+            os.path.join(os.path.dirname(__file__), "example_config.yaml"), "r"
+        ) as fh:
             example_config = yaml.safe_load(fh)
 
         # Configure logger with example_config.yaml.
-        logging_.setup_logger(logging_config=example_config,
-                              output_dir=tempfile.gettempdir())
+        logging_.setup_logger(
+            logging_config=example_config, output_dir=tempfile.gettempdir()
+        )
 
         # example_config sets the root logger's level to CRITICAL,
         # which corresponds to 50.
         self.assertEqual(logging.getLogger().getEffectiveLevel(), 50)
 
         # This time use the default configuration.
-        logging_.setup_logger(logging_config=None,
-                              output_dir=tempfile.gettempdir())
+        logging_.setup_logger(logging_config=None, output_dir=tempfile.gettempdir())
 
         # default config sets the root logger's level to DEBUG,
         # which corresponds to 10.
         self.assertEqual(logging.getLogger().getEffectiveLevel(), 10)
 
         # Make sure we log to the desired directory
-        logging_.setup_logger(output_dir=os.path.dirname(__file__),
-                              filename='test.log'
-                              )
+        logging_.setup_logger(output_dir=os.path.dirname(__file__), filename="test.log")
         logger = logging.getLogger()
-        logger.info('test_setup_logger')
+        logger.info("test_setup_logger")
 
-        with open(os.path.join(os.path.dirname(__file__), 'test.log')) as fh:
-            self.assertIn('test_setup_logger', ''.join(fh.readlines()))
-        os.remove(os.path.join(os.path.dirname(__file__), 'test.log'))
+        with open(os.path.join(os.path.dirname(__file__), "test.log")) as fh:
+            self.assertIn("test_setup_logger", "".join(fh.readlines()))
+        os.remove(os.path.join(os.path.dirname(__file__), "test.log"))
diff --git a/test/test_util/test_single_thread_client.py b/test/test_util/test_single_thread_client.py
index 34fe7736fe..770ff9f04a 100644
--- a/test/test_util/test_single_thread_client.py
+++ b/test/test_util/test_single_thread_client.py
@@ -1,8 +1,6 @@
 import dask.distributed
-
-from distributed.utils_test import inc
-
 import pytest
+from distributed.utils_test import inc
 
 from autosklearn.util.single_thread_client import SingleThreadedClient
 
diff --git a/test/test_util/test_trials_callback.py b/test/test_util/test_trials_callback.py
index 3cda8ea204..d1bfe6b748 100644
--- a/test/test_util/test_trials_callback.py
+++ b/test/test_util/test_trials_callback.py
@@ -13,56 +13,62 @@
 
 
 class AutoMLTrialsCallBack(IncorporateRunResultCallback):
-
     def __init__(self, fname):
         self.trials_num = 1
         self.fname = fname
         with open(fname, "w") as fp:
-            fp.write("TrialNo, "
-                     "StartTime, "
-                     "EndTime, "
-                     "Status, "
-                     "TrainLoss, "
-                     "ValidLoss, "
-                     "TestLoss, "
-                     "Classifier")
+            fp.write(
+                "TrialNo, "
+                "StartTime, "
+                "EndTime, "
+                "Status, "
+                "TrainLoss, "
+                "ValidLoss, "
+                "TestLoss, "
+                "Classifier"
+            )
 
     def __call__(
-            self, smbo: 'SMBO',
-            run_info: RunInfo,
-            result: RunValue,
-            time_left: float,
+        self,
+        smbo: "SMBO",
+        run_info: RunInfo,
+        result: RunValue,
+        time_left: float,
     ) -> None:
         train_loss, valid_loss, test_loss = None, None, None
         trial_start_time = result.starttime
         trial_end_time = result.endtime
         trial_status = result.status.name
         if trial_status == StatusType.SUCCESS.name:
-            train_loss = result.additional_info.get('train_loss')
+            train_loss = result.additional_info.get("train_loss")
             valid_loss = result.cost
-            test_loss = result.additional_info.get('test_loss')
-        trial_classifier = run_info.config.get_dictionary()['classifier:__choice__']
+            test_loss = result.additional_info.get("test_loss")
+        trial_classifier = run_info.config.get_dictionary()["classifier:__choice__"]
         with open(self.fname, "a+") as fp:
-            fp.write(f"\n {self.trials_num}, {trial_start_time}, {trial_end_time}, {trial_status}, "
-                     f"{train_loss}, {valid_loss}, {test_loss}, {trial_classifier}")
+            fp.write(
+                f"\n {self.trials_num}, {trial_start_time}, {trial_end_time},"
+                f" {trial_status}, {train_loss}, {valid_loss}, {test_loss},"
+                f" {trial_classifier}"
+            )
         self.trials_num += 1
 
 
 class VerifyTrialsCallBack(unittest.TestCase):
-
     def test_trials_callback_execution(self):
         trials_summary_fname = os.path.join(tempfile.gettempdir(), "trials.csv")
-        X_train, Y_train, X_test, Y_test = putil.get_dataset('breast_cancer')
-        cls = AutoSklearnClassifier(time_left_for_this_task=30,
-                                    initial_configurations_via_metalearning=0,
-                                    per_run_time_limit=10,
-                                    memory_limit=1024,
-                                    delete_tmp_folder_after_terminate=False,
-                                    n_jobs=1,
-                                    include={'feature_preprocessor': ['pca'],
-                                             'classifier': ['sgd']},
-                                    get_trials_callback=AutoMLTrialsCallBack(trials_summary_fname)
-                                    )
+        X_train, Y_train, X_test, Y_test = putil.get_dataset("breast_cancer")
+        cls = AutoSklearnClassifier(
+            time_left_for_this_task=30,
+            initial_configurations_via_metalearning=0,
+            per_run_time_limit=10,
+            memory_limit=1024,
+            delete_tmp_folder_after_terminate=False,
+            n_jobs=1,
+            include={"feature_preprocessor": ["pca"], "classifier": ["sgd"]},
+            get_trials_callback=AutoMLTrialsCallBack(trials_summary_fname),
+        )
         cls.fit(X_train, Y_train, X_test, Y_test)
         trials = pd.read_csv(trials_summary_fname)
-        assert trials.shape[0] > 0, f"Auto-Sklearn explored {trials.shape[0] - 1} trials"
+        assert (
+            trials.shape[0] > 0
+        ), f"Auto-Sklearn explored {trials.shape[0] - 1} trials"
diff --git a/testcommand.sh b/testcommand.sh
deleted file mode 100644
index 00c8fe8321..0000000000
--- a/testcommand.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-pytest -n 3 --durations=20 --timeout=300 --dist load --timeout-method=thread --fulltrace -v $1