@@ -549,8 +549,11 @@ class EngineArgs:
549549 )
550550 """Custom logitproc types"""
551551
552+ # DEPRECATED
552553 async_scheduling : bool = SchedulerConfig .async_scheduling
553554
555+ disable_async_scheduling : bool = not SchedulerConfig .async_scheduling
556+
554557 kv_sharing_fast_prefill : bool = CacheConfig .kv_sharing_fast_prefill
555558
556559 def __post_init__ (self ):
@@ -1041,6 +1044,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
10411044 scheduler_group .add_argument (
10421045 "--async-scheduling" , ** scheduler_kwargs ["async_scheduling" ]
10431046 )
1047+ scheduler_group .add_argument (
1048+ "--disable-async-scheduling" ,
1049+ action = argparse .BooleanOptionalAction ,
1050+ help = "If True, disable the use of asynchronous scheduling." ,
1051+ )
10441052
10451053 # Compilation arguments
10461054 compilation_kwargs = get_kwargs (CompilationConfig )
@@ -1491,9 +1499,22 @@ def create_engine_config(
14911499 )
14921500
14931501 if self .async_scheduling :
1502+ # Async scheduling was explicitly enabled (deprecated)
1503+ if self .disable_async_scheduling :
1504+ raise ValueError (
1505+ "Cannot set both async_scheduling and disable_async_scheduling"
1506+ )
1507+
1508+ logger .warning (
1509+ "The async_scheduling arg is deprecated now that it is enabled "
1510+ "by default. Use disable_async_scheduling to disable it."
1511+ )
1512+
1513+ # Hard-fail compatibility checks if async scheduling
1514+ # was enabled *explicitly*.
14941515 if self .pipeline_parallel_size > 1 :
14951516 raise ValueError (
1496- "Async scheduling is not supported with pipeline-parallel-size > 1."
1517+ "Async scheduling is not supported with pipeline_parallel_size > 1."
14971518 )
14981519
14991520 # Currently, async scheduling does not support speculative decoding.
@@ -1504,6 +1525,16 @@ def create_engine_config(
15041525 "async scheduling."
15051526 )
15061527
1528+ if not self .disable_async_scheduling and (
1529+ self .pipeline_parallel_size > 1 or self .speculative_config is not None
1530+ ):
1531+ logger .warning (
1532+ "Async scheduling is not yet supported with "
1533+ "speculative decoding or pipeline_parallel_size > 1 "
1534+ "and will be disabled."
1535+ )
1536+ self .disable_async_scheduling = True
1537+
15071538 # Forward the deprecated CLI args to the EPLB config.
15081539 if self .num_redundant_experts is not None :
15091540 self .eplb_config .num_redundant_experts = self .num_redundant_experts
@@ -1547,14 +1578,21 @@ def create_engine_config(
15471578 _api_process_rank = self ._api_process_rank ,
15481579 )
15491580
1550- if self .async_scheduling and (
1551- parallel_config .distributed_executor_backend not in ("mp" , "uni" )
1552- ):
1581+ executor_supports_async_sched = (
1582+ parallel_config .distributed_executor_backend in ("mp" , "uni" )
1583+ )
1584+ if self .async_scheduling and not executor_supports_async_sched :
15531585 raise ValueError (
15541586 "Currently, async scheduling only supports `mp` or `uni` "
1555- "distributed executor backend, but you choose "
1587+ "distributed executor backend, but you chose "
15561588 f"`{ parallel_config .distributed_executor_backend } `."
15571589 )
1590+ if not self .disable_async_scheduling and not executor_supports_async_sched :
1591+ logger .warning (
1592+ "Currently, async scheduling only supports `mp` or `uni` "
1593+ "distributed executor backend, not `%s`, and so will be disabled."
1594+ )
1595+ self .disable_async_scheduling = True
15581596
15591597 speculative_config = self .create_speculative_config (
15601598 target_model_config = model_config ,
@@ -1585,7 +1623,7 @@ def create_engine_config(
15851623 max_long_partial_prefills = self .max_long_partial_prefills ,
15861624 long_prefill_token_threshold = self .long_prefill_token_threshold ,
15871625 disable_hybrid_kv_cache_manager = self .disable_hybrid_kv_cache_manager ,
1588- async_scheduling = self .async_scheduling ,
1626+ async_scheduling = not self .disable_async_scheduling ,
15891627 )
15901628
15911629 if not model_config .is_multimodal_model and self .default_mm_loras :
0 commit comments