sgl-project · zhyncs · Jun 9, 2025 · Jun 9, 2025
@@ -223,7 +223,7 @@ jobs:
       fail-fast: false
       matrix:
         runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
-        part: [0, 1]
+        part: [0, 1, 2, 3, 4, 5]
     runs-on: ${{matrix.runner}}
     steps:
       - name: Checkout code
@@ -240,7 +240,7 @@ jobs:
       - name: Run test
         timeout-minutes: 40
         run: |
-          bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
+          bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 6
 
   unit-test-backend-2-gpu-amd:
     if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
@@ -266,6 +266,30 @@ jobs:
         run: |
           bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
 
+  unit-test-backend-4-gpu-amd:
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+      github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        runner: [linux-mi300-gpu-4]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Start CI container
+        run: bash scripts/amd_ci_start_container.sh
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Install dependencies
+        run: bash scripts/amd_ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 40
+        run: |
+          bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-4-gpu-amd
+
   unit-test-backend-8-gpu-amd:
     if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
       github.event.pull_request.draft == false

diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py
@@ -104,6 +104,29 @@ class TestFile:
         TestFile("test_block_int8.py", 22),
         TestFile("test_create_kvindices.py", 2),
         TestFile("test_chunked_prefill.py", 313),
+        TestFile("test_embedding_openai_server.py", 141),
+        TestFile("test_eval_fp8_accuracy.py", 303),
+        TestFile("test_function_call_parser.py", 10),
+        TestFile("test_input_embeddings.py", 38),
+        TestFile("test_large_max_new_tokens.py", 41),
+        TestFile("test_metrics.py", 32),
+        TestFile("test_no_chunked_prefill.py", 108),
+        TestFile("test_no_overlap_scheduler.py", 234),
+        TestFile("test_penalty.py", 41),
+        TestFile("test_page_size.py", 60),
+        TestFile("test_pytorch_sampling_backend.py", 66),
+        TestFile("test_radix_attention.py", 105),
+        TestFile("test_reasoning_content.py", 89),
+        TestFile("test_enable_thinking.py", 70),
+        TestFile("test_request_length_validation.py", 31),
+        TestFile("test_retract_decode.py", 54),
+        TestFile("test_server_args.py", 1),
+        TestFile("test_skip_tokenizer_init.py", 117),
+        TestFile("test_torch_native_attention_backend.py", 123),
+        TestFile("test_triton_attention_backend.py", 150),
+        TestFile("test_update_weights_from_disk.py", 114),
+        TestFile("test_vertex_endpoint.py", 31),
+        TestFile("test_vision_chunked_prefill.py", 175),
     ],
     "per-commit-2-gpu": [
         TestFile("models/lora/test_lora_tp.py", 116),
@@ -116,13 +139,20 @@ class TestFile:
         TestFile("test_verl_engine_2_gpu.py", 64),
     ],
     "per-commit-2-gpu-amd": [
+        TestFile("models/lora/test_lora_tp.py", 116),
+        TestFile("test_data_parallelism.py", 73),
         TestFile("test_mla_tp.py", 170),
+        TestFile("test_patch_torch.py", 19),
+        TestFile("test_update_weights_from_distributed.py", 103),
     ],
     "per-commit-4-gpu": [
         TestFile("test_local_attn.py", 250),
         TestFile("test_pp_single_node.py", 150),
         TestFile("test_verl_engine_4_gpu.py", 64),
     ],
+    "per-commit-4-gpu-amd": [
+        TestFile("test_pp_single_node.py", 150),
+    ],
     "per-commit-8-gpu": [
         # Disabled deepep tests temporarily because it takes too much time.
         # TODO: re-enable them after reducing the test time with compilation cache and smaller models.