Skip to content

Commit b42e2d2

Browse files
committed
[WIP][Core] Enable async scheduling by default
Signed-off-by: Nick Hill <[email protected]>
1 parent 3eb0c26 commit b42e2d2

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

vllm/config/scheduler.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,11 +135,12 @@ class SchedulerConfig:
135135
like full attention and sliding window attention.
136136
"""
137137

138-
async_scheduling: bool = False
139-
"""If set to True, perform async scheduling. This helps to avoid gaps in
140-
GPU utilization, leading to better latency and throughput.
141-
Async scheduling is currently not supported with some features such as
142-
speculative decoding and pipeline parallelism.
138+
async_scheduling: bool = Field(default=None)
139+
"""If set to False, disable async scheduling. Async scheduling helps to
140+
avoid gaps in GPU utilization, leading to better latency and throughput.
141+
It is currently not supported with some features such as
142+
speculative decoding and pipeline parallelism, and will be automatically
143+
disabled in those cases.
143144
"""
144145

145146
def get_scheduler_cls(self) -> type["SchedulerInterface"]:

vllm/config/vllm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,6 @@ def __post_init__(self):
386386
)
387387
elif self.scheduler_config.async_scheduling is None:
388388
# Enable async scheduling unless there is an incompatible option.
389-
# NOTE: we won't reach here until async scheduling is enabled by default.
390389
if (
391390
self.parallel_config.pipeline_parallel_size > 1
392391
or self.speculative_config is not None

0 commit comments

Comments
 (0)