Build SageAttention Wheel (Python 3.14) #5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build SageAttention Wheel (Python 3.14) | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| sageattention_version: | |
| description: 'SageAttention version to build' | |
| required: true | |
| default: '2.2.0' | |
| pytorch_version: | |
| description: 'PyTorch version' | |
| required: true | |
| default: '2.10.0' | |
| schedule: | |
| # Run weekly on Monday at 05:00 UTC | |
| - cron: '0 5 * * 1' | |
| env: | |
| PYTHON_VERSION: '3.14' | |
| CUDA_VERSION: '13.0' | |
| TORCH_CUDA_ARCH_LIST: '8.9' # RTX 4090 | |
| permissions: | |
| contents: write | |
| jobs: | |
| build: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Free disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /usr/local/share/boost | |
| df -h | |
| - name: Set up Python ${{ env.PYTHON_VERSION }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| - name: Install CUDA 13.0 Toolkit | |
| run: | | |
| # Add NVIDIA CUDA repository (Ubuntu 22.04) | |
| wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb | |
| sudo dpkg -i cuda-keyring_1.1-1_all.deb | |
| sudo apt-get update | |
| # Install CUDA 13.0 packages (both dev and runtime) | |
| sudo apt-get install -y \ | |
| cuda-toolkit-13-0 \ | |
| cuda-runtime-13-0 \ | |
| cuda-libraries-13-0 \ | |
| cuda-libraries-dev-13-0 | |
| # Set environment variables | |
| echo "CUDA_HOME=/usr/local/cuda-13.0" >> $GITHUB_ENV | |
| echo "PATH=/usr/local/cuda-13.0/bin:$PATH" >> $GITHUB_ENV | |
| echo "LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Verify CUDA installation | |
| run: | | |
| /usr/local/cuda-13.0/bin/nvcc --version | |
| echo "CUDA_HOME=$CUDA_HOME" | |
| echo "" | |
| echo "Compiler versions:" | |
| gcc --version | head -1 | |
| g++ --version | head -1 | |
| - name: Install build dependencies | |
| run: | | |
| python -m pip install --upgrade pip wheel setuptools ninja | |
| pip install torch==${{ inputs.pytorch_version || '2.10.0' }} --index-url https://download.pytorch.org/whl/cu130 | |
| - name: Clone SageAttention repository | |
| run: | | |
| git clone --recursive https://github.com/thu-ml/SageAttention.git | |
| cd SageAttention | |
| git checkout v${{ inputs.sageattention_version || '2.2.0' }} || git checkout main | |
| git submodule update --init --recursive | |
| echo "Building from commit: $(git rev-parse HEAD)" | |
| - name: Patch SageAttention setup.py for cross-compilation and version | |
| run: | | |
| cd SageAttention | |
| # Remove any SM target assertions that may fail without GPU | |
| if grep -q "assert.*sm_targets" setup.py; then | |
| sed -i '/assert.*sm_targets/d' setup.py | |
| fi | |
| # Force SM targets from environment if setup.py doesn't respect TORCH_CUDA_ARCH_LIST | |
| if grep -q "sm_targets = \[\]" setup.py; then | |
| sed -i 's/sm_targets = \[\]/sm_targets = [line.strip() for line in os.environ.get("TORCH_CUDA_ARCH_LIST", "").split(",") if line.strip()] if os.environ.get("TORCH_CUDA_ARCH_LIST") else []/' setup.py | |
| fi | |
| # Inject CUDA version, GPU arch, and PyTorch version into package version string | |
| PYTORCH_VERSION="${{ inputs.pytorch_version || '2.10.0' }}" | |
| CUDA_SHORT_VERSION="${{ env.CUDA_VERSION }}" | |
| CUDA_SHORT_VERSION="${CUDA_SHORT_VERSION//./}" # Remove dots: 13.0 -> 130 | |
| CUDA_ARCH="${{ env.TORCH_CUDA_ARCH_LIST }}" | |
| CUDA_ARCH="${CUDA_ARCH//./}" # Remove dots: 8.9 -> 89 | |
| VERSION_SUFFIX="+cu${CUDA_SHORT_VERSION}sm${CUDA_ARCH}torch${PYTORCH_VERSION}" | |
| # Check if version already has the correct metadata | |
| if grep -q "${VERSION_SUFFIX}" setup.py; then | |
| echo "Version already has correct metadata: ${VERSION_SUFFIX}" | |
| else | |
| # Remove any existing version metadata to avoid duplicates | |
| # This handles cases like: 2.2.0+something -> 2.2.0 | |
| sed -i 's/version=\(['\''"][^+]*\)+[^'\''"]*/version=\1/' setup.py | |
| # Now add our version metadata | |
| sed -i "s/version=\(['\"][^'\"]*['\"\]\)/version=\1.replace('\"', '').replace(\"'\", '') + '${VERSION_SUFFIX}'/" setup.py | |
| echo "Patched version to include: ${VERSION_SUFFIX}" | |
| fi | |
| - name: Build SageAttention wheel | |
| run: | | |
| cd SageAttention | |
| export TORCH_CUDA_ARCH_LIST="${{ env.TORCH_CUDA_ARCH_LIST }}" | |
| export MAX_JOBS=4 | |
| python setup.py bdist_wheel | |
| env: | |
| CUDA_HOME: ${{ env.CUDA_HOME }} | |
| - name: List built wheels | |
| run: | | |
| ls -lh SageAttention/dist/ | |
| - name: Test wheel installation | |
| run: | | |
| pip install SageAttention/dist/*.whl | |
| python -c "import sageattention; print(f'SageAttention imported successfully')" | |
| - name: Get wheel name | |
| id: wheel_name | |
| run: | | |
| WHEEL_PATH=$(ls SageAttention/dist/*.whl) | |
| WHEEL_NAME=$(basename $WHEEL_PATH) | |
| echo "wheel_name=$WHEEL_NAME" >> $GITHUB_OUTPUT | |
| echo "wheel_path=$WHEEL_PATH" >> $GITHUB_OUTPUT | |
| - name: Create Release | |
| id: create_release | |
| uses: softprops/action-gh-release@v1 | |
| with: | |
| tag_name: sageattention-v${{ inputs.sageattention_version || '2.2.0' }}-py314-torch${{ inputs.pytorch_version || '2.10.0' }}-cu130 | |
| name: SageAttention v${{ inputs.sageattention_version || '2.2.0' }} Wheel for Python ${{ env.PYTHON_VERSION }} + PyTorch ${{ inputs.pytorch_version || '2.10.0' }} + CUDA ${{ env.CUDA_VERSION }} | |
| body: | | |
| Built with: | |
| - Python: ${{ env.PYTHON_VERSION }} | |
| - CUDA: ${{ env.CUDA_VERSION }} | |
| - PyTorch: ${{ inputs.pytorch_version || '2.10.0' }} | |
| - CUDA Arch: ${{ env.TORCH_CUDA_ARCH_LIST }} | |
| ## Installation | |
| ```bash | |
| pip install ${{ steps.wheel_name.outputs.wheel_name }} | |
| ``` | |
| Or directly from this release: | |
| ```bash | |
| pip install https://github.com/${{ github.repository }}/releases/download/sageattention-v${{ inputs.sageattention_version || '2.2.0' }}-py314-torch${{ inputs.pytorch_version || '2.10.0' }}-cu130/${{ steps.wheel_name.outputs.wheel_name }} | |
| ``` | |
| ## Notes | |
| SageAttention provides optimized attention mechanisms with improved throughput and reduced memory usage. | |
| Compatible with PyTorch ${{ inputs.pytorch_version || '2.10.0' }} and CUDA ${{ env.CUDA_VERSION }}. | |
| files: ${{ steps.wheel_name.outputs.wheel_path }} | |
| draft: false | |
| prerelease: false | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Upload wheel as artifact | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sageattention-py${{ env.PYTHON_VERSION }}-cu${{ env.CUDA_VERSION }} | |
| path: ${{ steps.wheel_name.outputs.wheel_path }} | |
| retention-days: 30 |