File tree Expand file tree Collapse file tree 2 files changed +8
-5
lines changed
Expand file tree Collapse file tree 2 files changed +8
-5
lines changed Original file line number Diff line number Diff line change @@ -970,7 +970,7 @@ def __init__(self,
970970 max_num_batched_tokens : Optional [int ],
971971 max_num_seqs : int ,
972972 max_model_len : int ,
973- use_v2_block_manager : bool = False ,
973+ use_v2_block_manager : bool = True ,
974974 num_lookahead_slots : int = 0 ,
975975 delay_factor : float = 0.0 ,
976976 enable_chunked_prefill : bool = False ,
Original file line number Diff line number Diff line change @@ -107,7 +107,7 @@ class EngineArgs:
107107 block_size : int = 16
108108 enable_prefix_caching : bool = False
109109 disable_sliding_window : bool = False
110- use_v2_block_manager : bool = False
110+ use_v2_block_manager : bool = True
111111 swap_space : float = 4 # GiB
112112 cpu_offload_gb : float = 0 # GiB
113113 gpu_memory_utilization : float = 0.90
@@ -369,9 +369,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
369369 action = 'store_true' ,
370370 help = 'Disables sliding window, '
371371 'capping to sliding window size' )
372- parser .add_argument ('--use-v2-block-manager' ,
373- action = 'store_true' ,
374- help = 'Use BlockSpaceMangerV2.' )
372+ parser .add_argument (
373+ '--use-v2-block-manager' ,
374+ default = EngineArgs .use_v2_block_manager ,
375+ action = 'store_true' ,
376+ help = 'Use BlockSpaceMangerV2. By default this is set to True. '
377+ 'Set to False to use BlockSpaceManagerV1' )
375378 parser .add_argument (
376379 '--num-lookahead-slots' ,
377380 type = int ,
You can’t perform that action at this time.
0 commit comments