Skip to content

Support of FP8 chunk prefill #127

Support of FP8 chunk prefill

Support of FP8 chunk prefill #127

Workflow file for this run

name: PR Test (XPU)
on:
pull_request:
branches: [main]
workflow_dispatch:
concurrency:
group: pr-test-xpu-${{ github.ref }}
cancel-in-progress: true
jobs:
build-and-test:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: sglang-pvc
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build Docker image
run: |
docker build \
--build-arg SG_LANG_KERNEL_BRANCH=${{ github.head_ref }} \
--build-arg SG_LANG_KERNEL_REPO=${{ github.event.pull_request.head.repo.clone_url }} \
--no-cache --progress=plain -f Dockerfile.xpu_kernel -t xpu_sglang:pvc .
- name: Run container
run: |
docker run -dt \
--device /dev/dri/ \
--name ci_sglang_xpu \
-e HF_TOKEN=$(cat ~/huggingface_token.txt) \
xpu_sglang:pvc
- name: Install Dependency
timeout-minutes: 20
run: |
docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
docker exec ci_sglang_xpu /bin/bash -c '/miniforge3/envs/py3.10/bin/huggingface-cli login --token ${HF_TOKEN} '
docker exec ci_sglang_xpu /bin/bash -c "ln -sf /miniforge3/envs/py3.10/bin/python3 /usr/bin/python3"
- name: Run Sglang Kernel Cases
timeout-minutes: 20
run: |
docker exec -w /root/sglang ci_sglang_xpu \
/bin/bash -c "cd /root/sglang/sgl-kernel-xpu/tests && python3 -m pytest -v -s test_awq_dequant.py test_topk_softmax.py test_flash_attention.py"
- name: Run Sglang Kernel Benchmarks
timeout-minutes: 20
run: |
docker exec -w /root/sglang ci_sglang_xpu \
/bin/bash -c "cd /root/sglang/sgl-kernel-xpu/benchmark && python3 bench_flash_attn.py "
- name: Run E2E Bfloat16 tests
timeout-minutes: 20
run: |
echo "[PlaceHolder for E2E Test...]"
- name: Run E2E Qunatization tests
timeout-minutes: 20
run: |
echo "[PlaceHolder for E2E Test...]"
- name: Cleanup container
if: always()
run: |
docker rm -f ci_sglang_xpu || true