add e2e testcase for async_scheduling

jiangpeng36 · Ronald1995 · jiangpeng36 · commit 703b71534511 · 2025-09-08T14:42:11.000+08:00
Signed-off-by: jiangpeng36 &lt;jiangpeng36@huawei.com&gt;
Signed-off-by: Ronald1995 &lt;ronaldautomobile@163.com&gt;
Co-authored-by: Ronald1995 &lt;ronaldautomobile@163.com&gt;
diff --git a/tests/e2e/singlecard/test_ascend_scheduler.py b/tests/e2e/singlecard/test_ascend_scheduler.py
@@ -4,6 +4,7 @@
 
 from tests.e2e.conftest import VllmRunner
 from tests.e2e.model_utils import check_outputs_equal
+from vllm import SamplingParams
 
 MODEL = "Qwen/Qwen3-0.6B"
 
@@ -86,3 +87,26 @@ def test_chunked_prefill_with_ascend_scheduler(
         name_0="vllm_output",
         name_1="chunked_prefill_output",
     )
+
+
+def test_async_scheduling() -> None:
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ] * 10
+    sampling_params = SamplingParams(temperature=0.2,
+                                     max_tokens=10,
+                                     stop_token_ids=None)
+
+    with VllmRunner(
+            # "Qwen/Qwen2.5-0.5B-Instruct"
+            "/home/jp/model/Qwen2.5-0.5B-Instruct",
+            max_model_len=4096,
+            max_num_seqs=50,
+            dtype="bfloat16",
+            gpu_memory_utilization=0.9,
+            async_scheduling=True,
+    ) as vllm_model:
+        vllm_model.generate(prompts, sampling_params=sampling_params)