[V1] Chore: cruft removal (#11724)

robertgshaw2-redhat · web-flow · commit ad0d567e1cdc · 2025-01-03T23:25:02.000Z
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -225,8 +225,6 @@ def __init__(
         # Logic to switch between engines is done at runtime instead of import
         # to avoid import order issues
         self.engine_class = self.get_engine_class()
-
-        # TODO(rob): enable mp by default (issue with fork vs spawn)
         self.llm_engine = self.engine_class.from_engine_args(
             engine_args, usage_context=UsageContext.LLM_CLASS)
 
diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
@@ -94,8 +94,6 @@ class InprocClient(EngineCoreClient):
 
         * pushes EngineCoreRequest directly into the EngineCore
         * pulls EngineCoreOutputs by stepping the EngineCore
-
-        TODO: support asyncio-mode for debugging.
     """
 
     def __init__(self, *args, **kwargs):
diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py
@@ -42,8 +42,6 @@ def __init__(
         use_cached_outputs: bool = False,
         multiprocess_mode: bool = False,
     ) -> None:
-
-        # TODO: Can we avoid this?
         self.model_config = vllm_config.model_config
 
         # Tokenizer (+ ensure liveness if running in another process).
@@ -179,8 +177,6 @@ def step(self) -> List[RequestOutput]:
 
         return request_outputs
 
-    # TODO(rob): Can we get rid of these?
-
     def get_model_config(self):
         return self.model_config
 
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
@@ -49,9 +49,6 @@ def __init__(
             cache_config.enable_prefix_caching
         self.mm_hasher = MMHasher()
 
-    # TODO: run in an ThreadpoolExecutor or BackgroundProcess.
-    # This ideally should releases the GIL, so we should not block the
-    # asyncio loop while this is running.
     def process_inputs(
         self,
         request_id: str,