Support of FP8 chunk prefill #126
Workflow file for this run
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | name: PR Test (XPU) | |
| on: | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| concurrency: | |
| group: pr-test-xpu-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| build-and-test: | |
| if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') | |
| runs-on: sglang-pvc | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v2 | |
| - name: Build Docker image | |
| run: | | |
| docker build \ | |
| --build-arg SG_LANG_KERNEL_BRANCH=${{ github.head_ref }} \ | |
| --build-arg SG_LANG_KERNEL_REPO=${{ github.event.pull_request.head.repo.clone_url }} \ | |
| --no-cache --progress=plain -f Dockerfile.xpu_kernel -t xpu_sglang:pvc . | |
| - name: Run container | |
| run: | | |
| docker run -dt \ | |
| --device /dev/dri/ \ | |
| --name ci_sglang_xpu \ | |
| -e HF_TOKEN=$(cat ~/huggingface_token.txt) \ | |
| xpu_sglang:pvc | |
| - name: Install Dependency | |
| timeout-minutes: 20 | |
| run: | | |
| docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip | |
| docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub | |
| docker exec ci_sglang_xpu /bin/bash -c '/miniforge3/envs/py3.10/bin/huggingface-cli login --token ${HF_TOKEN} ' | |
| docker exec ci_sglang_xpu /bin/bash -c "ln -sf /miniforge3/envs/py3.10/bin/python3 /usr/bin/python3" | |
| - name: Run Sglang Kernel Cases | |
| timeout-minutes: 20 | |
| run: | | |
| docker exec -w /root/sglang ci_sglang_xpu \ | |
| /bin/bash -c "cd /root/sglang/sgl-kernel-xpu/tests && python3 -m pytest -v -s test_awq_dequant.py test_topk_softmax.py test_flash_attention.py" | |
| - name: Run Sglang Kernel Benchmarks | |
| timeout-minutes: 20 | |
| run: | | |
| docker exec -w /root/sglang ci_sglang_xpu \ | |
| /bin/bash -c "cd /root/sglang/sgl-kernel-xpu/benchmark && python3 bench_flash_attn.py " | |
| - name: Run E2E Bfloat16 tests | |
| timeout-minutes: 20 | |
| run: | | |
| echo "[PlaceHolder for E2E Test...]" | |
| - name: Run E2E Qunatization tests | |
| timeout-minutes: 20 | |
| run: | | |
| echo "[PlaceHolder for E2E Test...]" | |
| - name: Cleanup container | |
| if: always() | |
| run: | | |
| docker rm -f ci_sglang_xpu || true |