Build SageAttention Wheel (Python 3.14) #5

Workflow file for this run

.github/workflows/build-sageattention-py314.yml at 3bcb633

	name: Build SageAttention Wheel (Python 3.14)

	on:
	workflow_dispatch:
	inputs:
	sageattention_version:
	description: 'SageAttention version to build'
	required: true
	default: '2.2.0'
	pytorch_version:
	description: 'PyTorch version'
	required: true
	default: '2.10.0'
	schedule:
	# Run weekly on Monday at 05:00 UTC
	- cron: '0 5 * * 1'

	env:
	PYTHON_VERSION: '3.14'
	CUDA_VERSION: '13.0'
	TORCH_CUDA_ARCH_LIST: '8.9' # RTX 4090

	permissions:
	contents: write

	jobs:
	build:
	runs-on: ubuntu-latest

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Free disk space
	run: \|
	sudo rm -rf /usr/share/dotnet
	sudo rm -rf /opt/ghc
	sudo rm -rf /usr/local/share/boost
	df -h

	- name: Set up Python ${{ env.PYTHON_VERSION }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install CUDA 13.0 Toolkit
	run: \|
	# Add NVIDIA CUDA repository (Ubuntu 22.04)
	wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
	sudo dpkg -i cuda-keyring_1.1-1_all.deb
	sudo apt-get update

	# Install CUDA 13.0 packages (both dev and runtime)
	sudo apt-get install -y \
	cuda-toolkit-13-0 \
	cuda-runtime-13-0 \
	cuda-libraries-13-0 \
	cuda-libraries-dev-13-0

	# Set environment variables
	echo "CUDA_HOME=/usr/local/cuda-13.0" >> $GITHUB_ENV
	echo "PATH=/usr/local/cuda-13.0/bin:$PATH" >> $GITHUB_ENV
	echo "LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV

	- name: Verify CUDA installation
	run: \|
	/usr/local/cuda-13.0/bin/nvcc --version
	echo "CUDA_HOME=$CUDA_HOME"
	echo ""
	echo "Compiler versions:"
	gcc --version \| head -1
	g++ --version \| head -1

	- name: Install build dependencies
	run: \|
	python -m pip install --upgrade pip wheel setuptools ninja
	pip install torch==${{ inputs.pytorch_version \|\| '2.10.0' }} --index-url https://download.pytorch.org/whl/cu130

	- name: Clone SageAttention repository
	run: \|
	git clone --recursive https://github.com/thu-ml/SageAttention.git
	cd SageAttention
	git checkout v${{ inputs.sageattention_version \|\| '2.2.0' }} \|\| git checkout main
	git submodule update --init --recursive
	echo "Building from commit: $(git rev-parse HEAD)"

	- name: Patch SageAttention setup.py for cross-compilation and version
	run: \|
	cd SageAttention
	# Remove any SM target assertions that may fail without GPU
	if grep -q "assert.*sm_targets" setup.py; then
	sed -i '/assert.*sm_targets/d' setup.py
	fi
	# Force SM targets from environment if setup.py doesn't respect TORCH_CUDA_ARCH_LIST
	if grep -q "sm_targets = \[\]" setup.py; then
	sed -i 's/sm_targets = \[\]/sm_targets = [line.strip() for line in os.environ.get("TORCH_CUDA_ARCH_LIST", "").split(",") if line.strip()] if os.environ.get("TORCH_CUDA_ARCH_LIST") else []/' setup.py
	fi

	# Inject CUDA version, GPU arch, and PyTorch version into package version string
	PYTORCH_VERSION="${{ inputs.pytorch_version \|\| '2.10.0' }}"
	CUDA_SHORT_VERSION="${{ env.CUDA_VERSION }}"
	CUDA_SHORT_VERSION="${CUDA_SHORT_VERSION//./}" # Remove dots: 13.0 -> 130
	CUDA_ARCH="${{ env.TORCH_CUDA_ARCH_LIST }}"
	CUDA_ARCH="${CUDA_ARCH//./}" # Remove dots: 8.9 -> 89
	VERSION_SUFFIX="+cu${CUDA_SHORT_VERSION}sm${CUDA_ARCH}torch${PYTORCH_VERSION}"

	# Check if version already has the correct metadata
	if grep -q "${VERSION_SUFFIX}" setup.py; then
	echo "Version already has correct metadata: ${VERSION_SUFFIX}"
	else
	# Remove any existing version metadata to avoid duplicates
	# This handles cases like: 2.2.0+something -> 2.2.0
	sed -i 's/version=$['\''"][^+]$+[^'\''"]/version=\1/' setup.py

	# Now add our version metadata
	sed -i "s/version=$['\"][^'\"]*['\"\]$/version=\1.replace('\"', '').replace(\"'\", '') + '${VERSION_SUFFIX}'/" setup.py
	echo "Patched version to include: ${VERSION_SUFFIX}"
	fi

	- name: Build SageAttention wheel
	run: \|
	cd SageAttention
	export TORCH_CUDA_ARCH_LIST="${{ env.TORCH_CUDA_ARCH_LIST }}"
	export MAX_JOBS=4
	python setup.py bdist_wheel
	env:
	CUDA_HOME: ${{ env.CUDA_HOME }}

	- name: List built wheels
	run: \|
	ls -lh SageAttention/dist/

	- name: Test wheel installation
	run: \|
	pip install SageAttention/dist/*.whl
	python -c "import sageattention; print(f'SageAttention imported successfully')"

	- name: Get wheel name
	id: wheel_name
	run: \|
	WHEEL_PATH=$(ls SageAttention/dist/*.whl)
	WHEEL_NAME=$(basename $WHEEL_PATH)
	echo "wheel_name=$WHEEL_NAME" >> $GITHUB_OUTPUT
	echo "wheel_path=$WHEEL_PATH" >> $GITHUB_OUTPUT

	- name: Create Release
	id: create_release
	uses: softprops/action-gh-release@v1
	with:
	tag_name: sageattention-v${{ inputs.sageattention_version \|\| '2.2.0' }}-py314-torch${{ inputs.pytorch_version \|\| '2.10.0' }}-cu130
	name: SageAttention v${{ inputs.sageattention_version \|\| '2.2.0' }} Wheel for Python ${{ env.PYTHON_VERSION }} + PyTorch ${{ inputs.pytorch_version \|\| '2.10.0' }} + CUDA ${{ env.CUDA_VERSION }}
	body: \|
	Built with:
	- Python: ${{ env.PYTHON_VERSION }}
	- CUDA: ${{ env.CUDA_VERSION }}
	- PyTorch: ${{ inputs.pytorch_version \|\| '2.10.0' }}
	- CUDA Arch: ${{ env.TORCH_CUDA_ARCH_LIST }}

	## Installation

	```bash
	pip install ${{ steps.wheel_name.outputs.wheel_name }}
	```

	Or directly from this release:

	```bash
	pip install https://github.com/${{ github.repository }}/releases/download/sageattention-v${{ inputs.sageattention_version \|\| '2.2.0' }}-py314-torch${{ inputs.pytorch_version \|\| '2.10.0' }}-cu130/${{ steps.wheel_name.outputs.wheel_name }}
	```

	## Notes

	SageAttention provides optimized attention mechanisms with improved throughput and reduced memory usage.
	Compatible with PyTorch ${{ inputs.pytorch_version \|\| '2.10.0' }} and CUDA ${{ env.CUDA_VERSION }}.
	files: ${{ steps.wheel_name.outputs.wheel_path }}
	draft: false
	prerelease: false
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	- name: Upload wheel as artifact
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: sageattention-py${{ env.PYTHON_VERSION }}-cu${{ env.CUDA_VERSION }}
	path: ${{ steps.wheel_name.outputs.wheel_path }}
	retention-days: 30

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Build SageAttention Wheel (Python 3.14) #5

Workflow file

Build SageAttention Wheel (Python 3.14) #5

Uh oh!

Workflow file for this run