fix(Multiprocessing): Fully disable multiprocessing when not used (#281)

Simpag · web-flow · commit 55d26b131829 · 2026-03-31T08:48:43.000+02:00
This PR fully disables multiprocessing when `max_processes = 1`. This speeds up execution on windows based systems as the overhead of spawning processing using `spawn` context is large. For this reason, this PR also disabled the multiprocessing checkpointing tests for all systems not running Linux since Linux by default uses `fork` which comes with less overhead. Finally, a better error message is given when users runs unguarded benchmarks (main script is not wrapped in `if __name__ == '__main__'`) while using multiprocessing (`max_processes > 1`). closes #279
diff --git a/decent_bench/benchmark/_benchmark.py b/decent_bench/benchmark/_benchmark.py
@@ -135,8 +135,7 @@ def resume_benchmark(  # noqa: PLR0912
             raise ValueError(f"Invalid checkpoint directory: metadata is not valid JSON - {e}") from e
 
     if create_backup:
-        backup_path = checkpoint_manager.create_backup()
-        LOGGER.info(f"Created backup of checkpoint directory at '{backup_path}'")
+        checkpoint_manager.create_backup()
 
     LOGGER.info(
         f"Resuming benchmark from checkpoint '{checkpoint_manager.checkpoint_dir}' with {metadata['n_trials']} trials "
@@ -189,7 +188,8 @@ def resume_benchmark(  # noqa: PLR0912
         checkpoint_manager=checkpoint_manager,
         runtime_metrics=runtime_metrics,
     )
-    log_listener.stop()
+    if log_listener is not None:
+        log_listener.stop()
     return results
 
 
@@ -282,15 +282,16 @@ def benchmark(
         checkpoint_manager=checkpoint_manager,
         runtime_metrics=runtime_metrics,
     )
-    log_listener.stop()
+    if log_listener is not None:
+        log_listener.stop()
     return results
 
 
 def _benchmark(
     algorithms: list[Algorithm[Network]],
     benchmark_problem: BenchmarkProblem,
-    log_listener: QueueListener,
-    manager: "SyncManager",
+    log_listener: QueueListener | None,
+    manager: "SyncManager | None",
     *,
     mp_context: "SpawnContext | None" = None,
     n_trials: int = 30,
@@ -374,16 +375,26 @@ def _init_logging_and_multiprocessing(
     log_level: int,
     max_processes: int | None,
     benchmark_problem: BenchmarkProblem,
-) -> tuple[QueueListener, "SyncManager", "SpawnContext | None"]:
+) -> tuple[QueueListener | None, "SyncManager | None", "SpawnContext | None"]:
     # Detect if PyTorch costs are being used to determine multiprocessing context
-    if max_processes != 1:
-        use_spawn = _should_use_spawn_context(benchmark_problem)
-        mp_context = get_context("spawn") if use_spawn else None
-    else:
-        use_spawn = False
-        mp_context = None
-
-    manager = Manager() if not use_spawn else get_context("spawn").Manager()
+    if max_processes == 1:
+        logger.start_logger(log_level)
+        return None, None, None
+
+    use_spawn = _should_use_spawn_context(benchmark_problem)
+    mp_context = get_context("spawn") if use_spawn else None
+    try:
+        manager = Manager() if mp_context is None else mp_context.Manager()
+    except RuntimeError as e:
+        if _is_multiprocessing_main_guard_error(e):
+            raise RuntimeError(
+                "Failed to start multiprocessing workers. Benchmark execution "
+                "must be launched inside a guarded main entrypoint. Wrap your benchmark call in:\n\n"
+                "if __name__ == '__main__':\n"
+                "    ... call decent_bench.benchmark(...)\n\n"
+                "This prevents child processes from re-running top-level script code during import."
+            ) from e
+        raise
     log_listener = logger.start_log_listener(manager, log_level)
 
     if use_spawn:
@@ -392,12 +403,18 @@ def _init_logging_and_multiprocessing(
     return log_listener, manager, mp_context
 
 
+def _is_multiprocessing_main_guard_error(exc: RuntimeError) -> bool:
+    """Return True for the common spawn bootstrap error caused by missing main guard."""
+    msg = str(exc)
+    return "start a new process before the" in msg and "bootstrapping phase" in msg
+
+
 def _run_trials(  # noqa: PLR0917
     algorithms: list[Algorithm[Network]],
     n_trials: int,
     problem: BenchmarkProblem,
     progress_bar_ctrl: ProgressBarController,
-    log_listener: QueueListener,
+    log_listener: QueueListener | None,
     max_processes: int | None,
     mp_context: "SpawnContext | None" = None,
     checkpoint_manager: "CheckpointManager | None" = None,
@@ -467,6 +484,12 @@ def _run_trials(  # noqa: PLR0917
     if max_processes == 1:
         partial_result = {alg: [_run_trial(*args) for args in trial_args[alg]] for alg in trial_args}
     else:
+        if log_listener is None:
+            # This shouldn't happen: internal invariant violation
+            raise RuntimeError(
+                "Log listener must be initialized for multiprocessing to handle logs from worker processes"
+            )
+
         with ProcessPoolExecutor(
             initializer=logger.start_queue_logger,
             initargs=(log_listener.queue,),
diff --git a/decent_bench/utils/progress_bar.py b/decent_bench/utils/progress_bar.py
@@ -167,7 +167,7 @@ class ProgressBarController:
     Args:
         manager: A multiprocessing :class:`~multiprocessing.managers.SyncManager` instance used to create a shared queue
             for coordinating progress updates across multiple processes. This enables thread-safe communication between
-            worker processes and the progress bar listener thread.
+            worker processes and the progress bar listener thread. If ``None``, a local in-process queue is used.
         algorithms: algorithms that will be run, each gets its own bar
         n_trials: number of trials the algorithms will run
         progress_step: if provided, the progress bar will step every `progress_step`.
@@ -182,14 +182,17 @@ class ProgressBarController:
 
     def __init__(  # noqa: PLR0917
         self,
-        manager: SyncManager,
+        manager: SyncManager | None,
         algorithms: Sequence[Algorithm[Any]],
         n_trials: int,
         progress_step: int | None,
         show_speed: bool = False,
         show_trial: bool = False,
     ):
-        self._progress_increment_queue: Queue[_ProgressRecord | None] = manager.Queue()
+        # Use a local queue for single-process runs to avoid multiprocessing manager overhead.
+        self._progress_increment_queue: Queue[_ProgressRecord | None] = (
+            manager.Queue() if manager is not None else Queue()
+        )
         self.progress_step = progress_step
         p_cols = [
             (TextColumn("{task.description}"), Text("Algorithm", style="bold")),
diff --git a/docs/source/api/decent_bench.rst b/docs/source/api/decent_bench.rst
@@ -17,7 +17,6 @@ decent\_bench
    decent_bench.costs
    decent_bench.datasets
    decent_bench.distributed_algorithms
-   decent_bench.utils.network_utils
    decent_bench.networks
    decent_bench.schemes
 
diff --git a/docs/source/user.rst b/docs/source/user.rst
@@ -22,6 +22,12 @@ Generally benchmark execution involves three steps:
 2. Compute metrics from the benchmark results, which returns a :class:`~decent_bench.benchmark.MetricResult` object.
 3. Display the computed metrics in tables and plots.
 
+Note:
+    When running benchmarks, be sure to guard the execution code with ``if __name__ == "__main__":`` to avoid issues with multiprocessing on some platforms (e.g., Windows).
+    This is a common Python practice to ensure that the benchmark code only runs when the script is executed directly, and not when it is imported as a module or when worker 
+    processes are spawned for multiprocessing. If you forget to include this guard and you are using multiprocessing, i.e. with ``max_processes > 1`` in :func:`~decent_bench.benchmark.benchmark`, 
+    you may encounter errors or unexpected behavior due to the way multiprocessing works on different platforms.
+
 **The following is a working example. The remainder of the user guide will be updated soon.**
 
 .. code-block:: python
@@ -36,39 +42,40 @@ Generally benchmark execution involves three steps:
 
     import networkx as nx
 
-    ## problem definition
-    n_agents = 10
+    if __name__ == "__main__":
+        ## problem definition
+        n_agents = 10
 
-    costs, x_optimal = create_quadratic_problem(10, n_agents)
+        costs, x_optimal = create_quadratic_problem(10, n_agents)
 
-    agents = [Agent(i, cost) for i, cost in enumerate(costs)]
-    graph = nx.complete_graph(n_agents)
-    
-    net = P2PNetwork(
-        graph=graph,
-        agents=agents,
-    )
+        agents = [Agent(i, cost) for i, cost in enumerate(costs)]
+        graph = nx.complete_graph(n_agents)
+        
+        net = P2PNetwork(
+            graph=graph,
+            agents=agents,
+        )
 
-    bp = benchmark.BenchmarkProblem(net, x_optimal)
+        bp = benchmark.BenchmarkProblem(net, x_optimal)
 
-    ## benchmarking
-    cm = CheckpointManager(checkpoint_dir="results/benchmark_1", checkpoint_step=100, keep_n_checkpoints=2)
+        ## benchmarking
+        cm = CheckpointManager(checkpoint_dir="results/benchmark_1", checkpoint_step=100, keep_n_checkpoints=2)
 
-    num_iter = 1000
-    step = 0.001
+        num_iter = 1000
+        step = 0.001
 
-    res = benchmark.benchmark(algorithms=[
-            DGD(iterations=num_iter, step_size=step),
-            ATC(iterations=num_iter, step_size=step),
-        ],
-        benchmark_problem=bp,
-        checkpoint_manager=cm,
-        n_trials=1,
-        )
+        res = benchmark.benchmark(algorithms=[
+                DGD(iterations=num_iter, step_size=step),
+                ATC(iterations=num_iter, step_size=step),
+            ],
+            benchmark_problem=bp,
+            checkpoint_manager=cm,
+            n_trials=1,
+            )
 
-    metr = benchmark.compute_metrics(res, checkpoint_manager=cm)
+        metr = benchmark.compute_metrics(res, checkpoint_manager=cm)
 
-    benchmark.display_metrics(metr, checkpoint_manager=cm)
+        benchmark.display_metrics(metr, checkpoint_manager=cm)
 
 
 Benchmark executions will have outputs like these:
diff --git a/readthedocs.yaml b/readthedocs.yaml
@@ -9,6 +9,11 @@ build:
   os: ubuntu-24.04
   tools:
     python: "3.13"
+  jobs:
+    build:
+      html:
+        - mkdir -p $READTHEDOCS_OUTPUT/html/
+        - python -m sphinx -T -W --keep-going -j 1 -b html -d _build/doctrees -D language=en docs/source $READTHEDOCS_OUTPUT/html
 
 # Build documentation in the "docs/" directory with Sphinx
 sphinx:
diff --git a/test/utils/test_checkpoints.py b/test/utils/test_checkpoints.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import random
+import sys
 from copy import deepcopy
 from dataclasses import dataclass
 from pathlib import Path
@@ -38,6 +39,15 @@
 # Suppress JAX debug logs that cause issues during cleanup
 logging.getLogger("jax").setLevel(logging.WARNING)
 
+IS_LINUX = sys.platform.startswith("linux")
+LINUX_ONLY_MP_GT1 = pytest.mark.skipif(not IS_LINUX, reason="max_processes > 1 is Linux-only")
+
+
+def _skip_if_max_processes_exceeds_cpu_count(max_processes: int) -> None:
+    cpu_count = os.cpu_count()
+    if cpu_count is not None and max_processes > cpu_count:
+        pytest.skip(f"max_processes={max_processes} exceeds available CPU cores")
+
 
 @dataclass(eq=False)
 class DummyAlg(DGD):
@@ -298,7 +308,7 @@ def test_create_backup_and_clear(tmp_path: Path) -> None:  # noqa: D103
     ("cost_cls", "max_processes"),
     [
         (LogisticRegressionCost, 1),
-        (LogisticRegressionCost, 2),
+        pytest.param(LogisticRegressionCost, 2, marks=LINUX_ONLY_MP_GT1),
         pytest.param(
             PyTorchCost,
             1,
@@ -315,8 +325,7 @@ def test_resume_from_checkpoint_with_additional_trials(
     max_processes: int,
     seed: int | None,
 ) -> None:
-    if os.cpu_count() is not None and max_processes > os.cpu_count():
-        pytest.skip(f"max_processes={max_processes} exceeds available CPU cores")
+    _skip_if_max_processes_exceeds_cpu_count(max_processes)
 
     if seed is not None:
         iop.set_seed(seed)
@@ -405,7 +414,7 @@ def test_resume_from_checkpoint_with_additional_trials(
     ("cost_cls", "max_processes"),
     [
         (LogisticRegressionCost, 1),
-        (LogisticRegressionCost, 2),
+        pytest.param(LogisticRegressionCost, 2, marks=LINUX_ONLY_MP_GT1),
         pytest.param(
             PyTorchCost,
             1,
@@ -422,8 +431,7 @@ def test_resume_from_checkpoint_with_additional_iterations(
     max_processes: int,
     seed: int | None,
 ) -> None:
-    if os.cpu_count() is not None and max_processes > os.cpu_count():
-        pytest.skip(f"max_processes={max_processes} exceeds available CPU cores")
+    _skip_if_max_processes_exceeds_cpu_count(max_processes)
 
     if seed is not None:
         iop.set_seed(seed)
@@ -514,7 +522,7 @@ def test_resume_from_checkpoint_with_additional_iterations(
     ("cost_cls", "max_processes"),
     [
         (LogisticRegressionCost, 1),
-        (LogisticRegressionCost, 2),
+        pytest.param(LogisticRegressionCost, 2, marks=LINUX_ONLY_MP_GT1),
         pytest.param(
             PyTorchCost,
             1,
@@ -531,8 +539,7 @@ def test_resume_from_checkpoint_with_additional_iterations_and_trials(
     max_processes: int,
     seed: int | None,
 ) -> None:
-    if os.cpu_count() is not None and max_processes > os.cpu_count():
-        pytest.skip(f"max_processes={max_processes} exceeds available CPU cores")
+    _skip_if_max_processes_exceeds_cpu_count(max_processes)
 
     if seed is not None:
         iop.set_seed(seed)
@@ -624,7 +631,7 @@ def test_resume_from_checkpoint_with_additional_iterations_and_trials(
     ("cost_cls", "max_processes"),
     [
         (LogisticRegressionCost, 1),
-        (LogisticRegressionCost, 2),
+        pytest.param(LogisticRegressionCost, 2, marks=LINUX_ONLY_MP_GT1),
         pytest.param(
             PyTorchCost,
             1,
@@ -641,8 +648,7 @@ def test_resume_from_non_completed_checkpoint(
     max_processes: int,
     seed: int | None,
 ) -> None:
-    if os.cpu_count() is not None and max_processes > os.cpu_count():
-        pytest.skip(f"max_processes={max_processes} exceeds available CPU cores")
+    _skip_if_max_processes_exceeds_cpu_count(max_processes)
 
     if seed is not None:
         iop.set_seed(seed)
@@ -774,7 +780,7 @@ def test_resume_from_non_completed_checkpoint(
     ("cost_cls", "max_processes"),
     [
         (LogisticRegressionCost, 1),
-        (LogisticRegressionCost, 2),
+        pytest.param(LogisticRegressionCost, 2, marks=LINUX_ONLY_MP_GT1),
         pytest.param(
             PyTorchCost,
             1,
@@ -789,8 +795,7 @@ def test_back_to_back_benchmarks(
     cost_cls: type[LogisticRegressionCost | PyTorchCost],
     max_processes: int,
 ) -> None:
-    if os.cpu_count() is not None and max_processes > os.cpu_count():
-        pytest.skip(f"max_processes={max_processes} exceeds available CPU cores")
+    _skip_if_max_processes_exceeds_cpu_count(max_processes)
 
     iop.set_seed(123)
     problem_5, algorithms_5 = _build_problem_and_algorithms(5, cost_cls=cost_cls)