vllm-project
diff --git a/‎vllm_omni/diffusion/diffusion_engine.py‎
Lines changed: 19 additions & 185 deletions b/‎vllm_omni/diffusion/diffusion_engine.py‎
Lines changed: 19 additions & 185 deletions
diff --git a/‎vllm_omni/diffusion/executor/__init__.py‎ b/‎vllm_omni/diffusion/executor/__init__.py‎
diff --git a/‎vllm_omni/diffusion/executor/abstract.py‎
Lines changed: 86 additions & 0 deletions b/‎vllm_omni/diffusion/executor/abstract.py‎
Lines changed: 86 additions & 0 deletions
@@ -1,27 +1,23 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import multiprocessing as mp
 import os
 import time
-import weakref
-from collections.abc import Callable, Iterable
-from dataclasses import dataclass
+from collections.abc import Iterable
 from typing import Any
 
 import PIL.Image
 from vllm.logger import init_logger
 
-from vllm_omni.diffusion.data import SHUTDOWN_MESSAGE, OmniDiffusionConfig
+from vllm_omni.diffusion.data import OmniDiffusionConfig
+from vllm_omni.diffusion.executor.abstract import DiffusionExecutor
 from vllm_omni.diffusion.registry import (
     DiffusionModelRegistry,
     get_diffusion_post_process_func,
     get_diffusion_pre_process_func,
 )
 from vllm_omni.diffusion.request import OmniDiffusionRequest
-from vllm_omni.diffusion.scheduler import Scheduler, scheduler
 from vllm_omni.outputs import OmniRequestOutput
-from vllm_omni.utils.platform_utils import get_diffusion_worker_class
 
 logger = init_logger(__name__)
 
@@ -33,39 +29,6 @@ def supports_image_input(model_class_name: str) -> bool:
     return bool(getattr(model_cls, "support_image_input", False))
 
 
-@dataclass
-class BackgroundResources:
-    """
-    Used as a finalizer for clean shutdown.
-    Create a BackgroundResources instance to encapsulate all background resources
-    (e.g., the scheduler and worker processes) that need explicit cleanup.
-    This object holds references to external system resources that are not managed
-    by Python's garbage collector (like OS processes, message queues, etc.),
-    so they must be cleaned up manually to avoid resource leaks or zombie processes.
-    """
-
-    scheduler: Scheduler | None = None
-    processes: list[mp.Process] | None = None
-
-    def __call__(self):
-        """Clean up background resources."""
-        if scheduler is not None:
-            try:
-                for _ in range(scheduler.num_workers):
-                    scheduler.mq.enqueue(SHUTDOWN_MESSAGE)
-                scheduler.close()
-            except Exception as exc:
-                logger.warning("Failed to send shutdown signal: %s", exc)
-        for proc in self.processes:
-            if not proc.is_alive():
-                continue
-            proc.join(30)
-            if proc.is_alive():
-                logger.warning("Terminating diffusion worker %s after timeout", proc.name)
-                proc.terminate()
-                proc.join(30)
-
-
 class DiffusionEngine:
     """The diffusion engine for vLLM-Omni diffusion models."""
 
@@ -80,9 +43,9 @@ def __init__(self, od_config: OmniDiffusionConfig):
         self.post_process_func = get_diffusion_post_process_func(od_config)
         self.pre_process_func = get_diffusion_pre_process_func(od_config)
 
-        self._processes: list[mp.Process] = []
-        self._closed = False
-        self._make_client()
+        executor_class = DiffusionExecutor.get_class(od_config)
+        self.executor = executor_class(od_config)
+
         try:
             self._dummy_run()
         except Exception as e:
@@ -200,96 +163,8 @@ def make_engine(config: OmniDiffusionConfig) -> "DiffusionEngine":
         """
         return DiffusionEngine(config)
 
-    def _make_client(self):
-        # TODO rename it
-        scheduler.initialize(self.od_config)
-
-        # Get the broadcast handle from the initialized scheduler
-        broadcast_handle = scheduler.get_broadcast_handle()
-
-        processes, result_handle = self._launch_workers(
-            broadcast_handle=broadcast_handle,
-        )
-
-        if result_handle is not None:
-            scheduler.initialize_result_queue(result_handle)
-        else:
-            logger.error("Failed to get result queue handle from workers")
-
-        self._processes = processes
-
-        self.resources = BackgroundResources(scheduler=scheduler, processes=self._processes)
-        # Use weakref.finalize instead of __del__ or relying on self.close() at shutdown.
-        # During interpreter shutdown, global state (e.g., modules, built-ins) may already
-        # be cleared (set to None), so calling normal cleanup methods can fail with
-        # AttributeError: 'NoneType' object has no attribute '...'.
-        # weakref.finalize schedules cleanup *before* such destruction begins,
-        # ensuring resources are released while the runtime environment is still intact.
-        self._finalizer = weakref.finalize(self, self.resources)
-
-    def _launch_workers(self, broadcast_handle):
-        od_config = self.od_config
-        logger.info("Starting server...")
-
-        num_gpus = od_config.num_gpus
-        mp.set_start_method("spawn", force=True)
-        processes = []
-
-        # Get the appropriate worker class for current device
-        worker_proc = get_diffusion_worker_class()
-
-        # Launch all worker processes
-        scheduler_pipe_readers = []
-        scheduler_pipe_writers = []
-
-        for i in range(num_gpus):
-            reader, writer = mp.Pipe(duplex=False)
-            scheduler_pipe_writers.append(writer)
-            process = mp.Process(
-                target=worker_proc.worker_main,
-                args=(
-                    i,  # rank
-                    od_config,
-                    writer,
-                    broadcast_handle,
-                ),
-                name=f"DiffusionWorker-{i}",
-                daemon=True,
-            )
-            scheduler_pipe_readers.append(reader)
-            process.start()
-            processes.append(process)
-
-        # Wait for all workers to be ready
-        scheduler_infos = []
-        result_handle = None
-        for writer in scheduler_pipe_writers:
-            writer.close()
-
-        for i, reader in enumerate(scheduler_pipe_readers):
-            try:
-                data = reader.recv()
-            except EOFError:
-                logger.error(f"Rank {i} scheduler is dead. Please check if there are relevant logs.")
-                processes[i].join()
-                logger.error(f"Exit code: {processes[i].exitcode}")
-                raise
-
-            if data["status"] != "ready":
-                raise RuntimeError("Initialization failed. Please see the error messages above.")
-
-            if i == 0:
-                result_handle = data.get("result_handle")
-
-            scheduler_infos.append(data)
-            reader.close()
-
-        logger.debug("All workers are ready")
-
-        return processes, result_handle
-
     def add_req_and_wait_for_response(self, requests: list[OmniDiffusionRequest]):
-        return scheduler.add_req(requests)
+        return self.executor.add_req(requests)
 
     def start_profile(self, trace_filename: str | None = None) -> None:
         """
@@ -437,7 +312,7 @@ def _dummy_run(self):
 
     def collective_rpc(
         self,
-        method: str | Callable,
+        method: str,
         timeout: float | None = None,
         args: tuple = (),
         kwargs: dict | None = None,
@@ -446,7 +321,7 @@ def collective_rpc(
         """Call a method on worker processes and get results immediately.
 
         Args:
-            method: The method name (str) or callable to execute on workers
+            method: The method name (str) to execute on workers
             timeout: Optional timeout in seconds
             args: Positional arguments for the method
             kwargs: Keyword arguments for the method
@@ -455,59 +330,18 @@ def collective_rpc(
         Returns:
             Single result if unique_reply_rank is provided, otherwise list of results
         """
-        if self._closed:
-            raise RuntimeError("DiffusionEngine is closed.")
-
-        deadline = None if timeout is None else time.monotonic() + timeout
-        kwargs = kwargs or {}
-
-        assert isinstance(method, str)
-        send_method = method
-
-        # Prepare RPC request message
-        rpc_request = {
-            "type": "rpc",
-            "method": send_method,
-            "args": args,
-            "kwargs": kwargs,
-            "output_rank": unique_reply_rank,
-        }
-
-        try:
-            # Broadcast RPC request to all workers via unified message queue
-            scheduler.mq.enqueue(rpc_request)
-
-            # Determine which workers we expect responses from
-            num_responses = 1 if unique_reply_rank is not None else self.od_config.num_gpus
-
-            responses = []
-            for _ in range(num_responses):
-                dequeue_timeout = None if deadline is None else (deadline - time.monotonic())
-                try:
-                    if scheduler.result_mq is None:
-                        raise RuntimeError("Result queue not initialized")
-
-                    response = scheduler.result_mq.dequeue(timeout=dequeue_timeout)
-
-                    # Check if response indicates an error
-                    if isinstance(response, dict) and response.get("status") == "error":
-                        raise RuntimeError(
-                            f"Worker failed with error '{response.get('error')}', "
-                            "please check the stack trace above for the root cause"
-                        )
-
-                    responses.append(response)
-                except TimeoutError as e:
-                    raise TimeoutError(f"RPC call to {method} timed out.") from e
-
-            return responses[0] if unique_reply_rank is not None else responses
-
-        except Exception as e:
-            logger.error(f"RPC call failed: {e}")
-            raise
+        assert isinstance(method, str), "Only string method names are supported for now"
+        return self.executor.collective_rpc(
+            method=method,
+            timeout=timeout,
+            args=args,
+            kwargs=kwargs,
+            unique_reply_rank=unique_reply_rank,
+        )
 
     def close(self) -> None:
-        self._finalizer()
+        if hasattr(self, "executor"):
+            self.executor.shutdown()
 
     def abort(self, request_id: str | Iterable[str]) -> None:
         # TODO implement it
 
@@ -0,0 +1,86 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+from vllm.utils.import_utils import resolve_obj_by_qualname
+
+from vllm_omni.diffusion.data import OmniDiffusionConfig
+from vllm_omni.diffusion.request import OmniDiffusionRequest
+
+
+class DiffusionExecutor(ABC):
+    """Abstract base class for Diffusion executors."""
+
+    uses_multiproc: bool = False
+
+    @staticmethod
+    def get_class(od_config: OmniDiffusionConfig) -> type["DiffusionExecutor"]:
+        executor_class: type[DiffusionExecutor]
+        distributed_executor_backend = od_config.distributed_executor_backend
+
+        if isinstance(distributed_executor_backend, type):
+            if not issubclass(distributed_executor_backend, DiffusionExecutor):
+                raise TypeError(
+                    "distributed_executor_backend must be a subclass of "
+                    f"DiffusionExecutor. Got {distributed_executor_backend}."
+                )
+            executor_class = distributed_executor_backend
+        elif distributed_executor_backend == "ray":
+            raise NotImplementedError("ray backend is not yet supported.")
+        elif distributed_executor_backend == "mp":
+            from vllm_omni.diffusion.executor.multiproc_executor import MultiprocDiffusionExecutor
+
+            executor_class = MultiprocDiffusionExecutor
+        elif distributed_executor_backend == "external_launcher":
+            raise NotImplementedError("external_launcher backend is not yet supported.")
+        elif isinstance(distributed_executor_backend, str):
+            try:
+                executor_class = resolve_obj_by_qualname(distributed_executor_backend)
+            except (ImportError, ValueError) as e:
+                raise ValueError(
+                    f"Failed to load executor backend '{distributed_executor_backend}'. "
+                    f"Ensure it is a valid python path. Error: {e}"
+                ) from e
+
+            if not issubclass(executor_class, DiffusionExecutor):
+                raise TypeError(
+                    f"distributed_executor_backend must be a subclass of DiffusionExecutor. Got {executor_class}."
+                )
+        else:
+            raise ValueError(f"Unknown distributed executor backend: {distributed_executor_backend}")
+        return executor_class
+
+    def __init__(self, od_config: OmniDiffusionConfig):
+        self.od_config = od_config
+        self._init_executor()
+
+    @abstractmethod
+    def _init_executor(self) -> None:
+        """Initialize the executor (e.g., launch workers, setup IPC)."""
+        pass
+
+    @abstractmethod
+    def add_req(self, requests: list[OmniDiffusionRequest]):
+        """Add requests to the execution queue."""
+        pass
+
+    @abstractmethod
+    def collective_rpc(
+        self,
+        method: str,
+        timeout: float | None = None,
+        args: tuple = (),
+        kwargs: dict | None = None,
+        unique_reply_rank: int | None = None,
+    ) -> Any:
+        """Execute a method on workers."""
+        pass
+
+    @abstractmethod
+    def check_health(self) -> None:
+        """Check if the executor and workers are healthy."""
+        pass
+
+    @abstractmethod
+    def shutdown(self) -> None:
+        """Shutdown the executor and release resources."""
+        pass