Skip to content

Commit 703b715

Browse files
jiangpeng36Ronald1995
andcommitted
add e2e testcase for async_scheduling
Signed-off-by: jiangpeng36 <[email protected]> Signed-off-by: Ronald1995 <[email protected]> Co-authored-by: Ronald1995 <[email protected]>
1 parent 1130c63 commit 703b715

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

tests/e2e/singlecard/test_ascend_scheduler.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from tests.e2e.conftest import VllmRunner
66
from tests.e2e.model_utils import check_outputs_equal
7+
from vllm import SamplingParams
78

89
MODEL = "Qwen/Qwen3-0.6B"
910

@@ -86,3 +87,26 @@ def test_chunked_prefill_with_ascend_scheduler(
8687
name_0="vllm_output",
8788
name_1="chunked_prefill_output",
8889
)
90+
91+
92+
def test_async_scheduling() -> None:
93+
prompts = [
94+
"Hello, my name is",
95+
"The president of the United States is",
96+
"The capital of France is",
97+
"The future of AI is",
98+
] * 10
99+
sampling_params = SamplingParams(temperature=0.2,
100+
max_tokens=10,
101+
stop_token_ids=None)
102+
103+
with VllmRunner(
104+
# "Qwen/Qwen2.5-0.5B-Instruct"
105+
"/home/jp/model/Qwen2.5-0.5B-Instruct",
106+
max_model_len=4096,
107+
max_num_seqs=50,
108+
dtype="bfloat16",
109+
gpu_memory_utilization=0.9,
110+
async_scheduling=True,
111+
) as vllm_model:
112+
vllm_model.generate(prompts, sampling_params=sampling_params)

0 commit comments

Comments
 (0)