Skip to content

Build SageAttention Wheel (Python 3.14) #5

Build SageAttention Wheel (Python 3.14)

Build SageAttention Wheel (Python 3.14) #5

name: Build SageAttention Wheel (Python 3.14)
on:
workflow_dispatch:
inputs:
sageattention_version:
description: 'SageAttention version to build'
required: true
default: '2.2.0'
pytorch_version:
description: 'PyTorch version'
required: true
default: '2.10.0'
schedule:
# Run weekly on Monday at 05:00 UTC
- cron: '0 5 * * 1'
env:
PYTHON_VERSION: '3.14'
CUDA_VERSION: '13.0'
TORCH_CUDA_ARCH_LIST: '8.9' # RTX 4090
permissions:
contents: write
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Free disk space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
df -h
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install CUDA 13.0 Toolkit
run: |
# Add NVIDIA CUDA repository (Ubuntu 22.04)
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
# Install CUDA 13.0 packages (both dev and runtime)
sudo apt-get install -y \
cuda-toolkit-13-0 \
cuda-runtime-13-0 \
cuda-libraries-13-0 \
cuda-libraries-dev-13-0
# Set environment variables
echo "CUDA_HOME=/usr/local/cuda-13.0" >> $GITHUB_ENV
echo "PATH=/usr/local/cuda-13.0/bin:$PATH" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Verify CUDA installation
run: |
/usr/local/cuda-13.0/bin/nvcc --version
echo "CUDA_HOME=$CUDA_HOME"
echo ""
echo "Compiler versions:"
gcc --version | head -1
g++ --version | head -1
- name: Install build dependencies
run: |
python -m pip install --upgrade pip wheel setuptools ninja
pip install torch==${{ inputs.pytorch_version || '2.10.0' }} --index-url https://download.pytorch.org/whl/cu130
- name: Clone SageAttention repository
run: |
git clone --recursive https://github.com/thu-ml/SageAttention.git
cd SageAttention
git checkout v${{ inputs.sageattention_version || '2.2.0' }} || git checkout main
git submodule update --init --recursive
echo "Building from commit: $(git rev-parse HEAD)"
- name: Patch SageAttention setup.py for cross-compilation and version
run: |
cd SageAttention
# Remove any SM target assertions that may fail without GPU
if grep -q "assert.*sm_targets" setup.py; then
sed -i '/assert.*sm_targets/d' setup.py
fi
# Force SM targets from environment if setup.py doesn't respect TORCH_CUDA_ARCH_LIST
if grep -q "sm_targets = \[\]" setup.py; then
sed -i 's/sm_targets = \[\]/sm_targets = [line.strip() for line in os.environ.get("TORCH_CUDA_ARCH_LIST", "").split(",") if line.strip()] if os.environ.get("TORCH_CUDA_ARCH_LIST") else []/' setup.py
fi
# Inject CUDA version, GPU arch, and PyTorch version into package version string
PYTORCH_VERSION="${{ inputs.pytorch_version || '2.10.0' }}"
CUDA_SHORT_VERSION="${{ env.CUDA_VERSION }}"
CUDA_SHORT_VERSION="${CUDA_SHORT_VERSION//./}" # Remove dots: 13.0 -> 130
CUDA_ARCH="${{ env.TORCH_CUDA_ARCH_LIST }}"
CUDA_ARCH="${CUDA_ARCH//./}" # Remove dots: 8.9 -> 89
VERSION_SUFFIX="+cu${CUDA_SHORT_VERSION}sm${CUDA_ARCH}torch${PYTORCH_VERSION}"
# Check if version already has the correct metadata
if grep -q "${VERSION_SUFFIX}" setup.py; then
echo "Version already has correct metadata: ${VERSION_SUFFIX}"
else
# Remove any existing version metadata to avoid duplicates
# This handles cases like: 2.2.0+something -> 2.2.0
sed -i 's/version=\(['\''"][^+]*\)+[^'\''"]*/version=\1/' setup.py
# Now add our version metadata
sed -i "s/version=\(['\"][^'\"]*['\"\]\)/version=\1.replace('\"', '').replace(\"'\", '') + '${VERSION_SUFFIX}'/" setup.py
echo "Patched version to include: ${VERSION_SUFFIX}"
fi
- name: Build SageAttention wheel
run: |
cd SageAttention
export TORCH_CUDA_ARCH_LIST="${{ env.TORCH_CUDA_ARCH_LIST }}"
export MAX_JOBS=4
python setup.py bdist_wheel
env:
CUDA_HOME: ${{ env.CUDA_HOME }}
- name: List built wheels
run: |
ls -lh SageAttention/dist/
- name: Test wheel installation
run: |
pip install SageAttention/dist/*.whl
python -c "import sageattention; print(f'SageAttention imported successfully')"
- name: Get wheel name
id: wheel_name
run: |
WHEEL_PATH=$(ls SageAttention/dist/*.whl)
WHEEL_NAME=$(basename $WHEEL_PATH)
echo "wheel_name=$WHEEL_NAME" >> $GITHUB_OUTPUT
echo "wheel_path=$WHEEL_PATH" >> $GITHUB_OUTPUT
- name: Create Release
id: create_release
uses: softprops/action-gh-release@v1
with:
tag_name: sageattention-v${{ inputs.sageattention_version || '2.2.0' }}-py314-torch${{ inputs.pytorch_version || '2.10.0' }}-cu130
name: SageAttention v${{ inputs.sageattention_version || '2.2.0' }} Wheel for Python ${{ env.PYTHON_VERSION }} + PyTorch ${{ inputs.pytorch_version || '2.10.0' }} + CUDA ${{ env.CUDA_VERSION }}
body: |
Built with:
- Python: ${{ env.PYTHON_VERSION }}
- CUDA: ${{ env.CUDA_VERSION }}
- PyTorch: ${{ inputs.pytorch_version || '2.10.0' }}
- CUDA Arch: ${{ env.TORCH_CUDA_ARCH_LIST }}
## Installation
```bash
pip install ${{ steps.wheel_name.outputs.wheel_name }}
```
Or directly from this release:
```bash
pip install https://github.com/${{ github.repository }}/releases/download/sageattention-v${{ inputs.sageattention_version || '2.2.0' }}-py314-torch${{ inputs.pytorch_version || '2.10.0' }}-cu130/${{ steps.wheel_name.outputs.wheel_name }}
```
## Notes
SageAttention provides optimized attention mechanisms with improved throughput and reduced memory usage.
Compatible with PyTorch ${{ inputs.pytorch_version || '2.10.0' }} and CUDA ${{ env.CUDA_VERSION }}.
files: ${{ steps.wheel_name.outputs.wheel_path }}
draft: false
prerelease: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload wheel as artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: sageattention-py${{ env.PYTHON_VERSION }}-cu${{ env.CUDA_VERSION }}
path: ${{ steps.wheel_name.outputs.wheel_path }}
retention-days: 30