Skip to content

Commit a007dde

Browse files
committed
[WIP][Core] Enable async scheduling by default
Signed-off-by: Nick Hill <[email protected]>
1 parent e502098 commit a007dde

File tree

2 files changed

+30
-13
lines changed

2 files changed

+30
-13
lines changed

vllm/config/scheduler.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,12 @@ class SchedulerConfig:
128128
like full attention and sliding window attention.
129129
"""
130130

131-
async_scheduling: bool = False
132-
"""If set to True, perform async scheduling. This helps to avoid gaps in
133-
GPU utilization, leading to better latency and throughput.
134-
Async scheduling is currently not supported with some features such as
135-
speculative decoding and pipeline parallelism.
131+
async_scheduling: bool = Field(default=None)
132+
"""If set to False, disable async scheduling. Async scheduling helps to
133+
avoid gaps in GPU utilization, leading to better latency and throughput.
134+
It is currently not supported with some features such as
135+
speculative decoding and pipeline parallelism, and will be automatically
136+
disabled in those cases.
136137
"""
137138

138139
stream_interval: int = Field(default=1, ge=1)

vllm/config/vllm.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def __post_init__(self):
381381
if self.speculative_config.method not in get_args(EagleModelTypes):
382382
raise ValueError(
383383
"Currently, async scheduling is only supported "
384-
"with EAGLE/MTP kind of speculative decoding"
384+
"with EAGLE/MTP kind of speculative decoding."
385385
)
386386
if self.speculative_config.disable_padded_drafter_batch:
387387
raise ValueError(
@@ -399,16 +399,27 @@ def __post_init__(self):
399399
)
400400
elif self.scheduler_config.async_scheduling is None:
401401
# Enable async scheduling unless there is an incompatible option.
402-
# NOTE: we won't reach here until async scheduling is enabled by default.
403-
if (
404-
self.parallel_config.pipeline_parallel_size > 1
405-
or self.speculative_config is not None
406-
):
402+
if self.parallel_config.pipeline_parallel_size > 1:
407403
logger.warning(
408-
"Async scheduling is not yet supported with speculative decoding "
409-
" or pipeline_parallel_size > 1 and will be disabled."
404+
"Async scheduling is not yet supported with "
405+
"pipeline_parallel_size > 1 and will be disabled."
410406
)
411407
self.scheduler_config.async_scheduling = False
408+
elif self.speculative_config is not None:
409+
if self.speculative_config.method not in get_args(EagleModelTypes):
410+
logger.warning(
411+
"Async scheduling not supported with %s-based "
412+
"speculative decoding and will be disabled.",
413+
self.speculative_config.method,
414+
)
415+
else:
416+
logger.warning(
417+
"Async scheduling will be disabled because some features do "
418+
"not currently work in conjunction with speculative decoding. "
419+
"To use async scheduling with spec decoding anyway, "
420+
"enable it explicitly via async_scheduling=True."
421+
)
422+
self.scheduler_config.async_scheduling = False
412423
elif not executor_supports_async_sched:
413424
logger.warning(
414425
"Async scheduling will be disabled because it is not supported "
@@ -420,6 +431,11 @@ def __post_init__(self):
420431
else:
421432
self.scheduler_config.async_scheduling = True
422433

434+
logger.info_once(
435+
"Asynchronous scheduling is %s.",
436+
"enabled" if self.scheduler_config.async_scheduling else "disabled",
437+
)
438+
423439
from vllm.platforms import current_platform
424440

425441
if (

0 commit comments

Comments
 (0)