File tree Expand file tree Collapse file tree 1 file changed +24
-0
lines changed
Expand file tree Collapse file tree 1 file changed +24
-0
lines changed Original file line number Diff line number Diff line change 44
55from tests .e2e .conftest import VllmRunner
66from tests .e2e .model_utils import check_outputs_equal
7+ from vllm import SamplingParams
78
89MODEL = "Qwen/Qwen3-0.6B"
910
@@ -86,3 +87,26 @@ def test_chunked_prefill_with_ascend_scheduler(
8687 name_0 = "vllm_output" ,
8788 name_1 = "chunked_prefill_output" ,
8889 )
90+
91+
92+ def test_async_scheduling () -> None :
93+ prompts = [
94+ "Hello, my name is" ,
95+ "The president of the United States is" ,
96+ "The capital of France is" ,
97+ "The future of AI is" ,
98+ ] * 10
99+ sampling_params = SamplingParams (temperature = 0.2 ,
100+ max_tokens = 10 ,
101+ stop_token_ids = None )
102+
103+ with VllmRunner (
104+ # "Qwen/Qwen2.5-0.5B-Instruct"
105+ "/home/jp/model/Qwen2.5-0.5B-Instruct" ,
106+ max_model_len = 4096 ,
107+ max_num_seqs = 50 ,
108+ dtype = "bfloat16" ,
109+ gpu_memory_utilization = 0.9 ,
110+ async_scheduling = True ,
111+ ) as vllm_model :
112+ vllm_model .generate (prompts , sampling_params = sampling_params )
You can’t perform that action at this time.
0 commit comments