Skip to content

Commit 5ee91d2

Browse files
committed
build wheels with CUDA 13.0.x, test wheels against mix of CTK versions
1 parent 9b9f23d commit 5ee91d2

5 files changed

Lines changed: 141 additions & 30 deletions

File tree

.github/workflows/pr.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ jobs:
258258
wheel-build-cpp:
259259
needs: checks
260260
secrets: inherit
261-
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14
261+
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@build-wheels-old-ctk
262262
with:
263263
matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
264264
build_type: pull-request
@@ -269,7 +269,7 @@ jobs:
269269
wheel-build-python:
270270
needs: wheel-build-cpp
271271
secrets: inherit
272-
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14
272+
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@build-wheels-old-ctk
273273
with:
274274
# Build a wheel for each CUDA x ARCH x minimum supported Python version
275275
matrix_filter: group_by({CUDA_VER, ARCH}) | map(min_by(.PY_VER | split(".") | map(tonumber)))

ci/download-torch-wheels.sh

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/bin/bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# [description]
6+
#
7+
# Downloads a CUDA variant of 'torch' from the correct index, based on CUDA major version.
8+
#
9+
# This exists to avoid using 'pip --extra-index-url', which could allow for CPU-only 'torch'
10+
# to be downloaded from pypi.org.
11+
#
12+
13+
set -e -u -o pipefail
14+
15+
TORCH_WHEEL_DIR="${1}"
16+
17+
# Ensure CUDA-enabled 'torch' packages are always used.
18+
#
19+
# Downloading + passing the downloaded file as a requirement forces the use of this
20+
# package, so we don't accidentally end up with a CPU-only 'torch' from 'pypi.org'
21+
# (which can happen because pip doesn't support index priority).
22+
#
23+
# Not appending this to PIP_CONSTRAINT, because we don't want the torch '--extra-index-url'
24+
# to leak outside of this script into other 'pip {download,install}'' calls.
25+
rapids-dependency-file-generator \
26+
--output requirements \
27+
--file-key "torch_only" \
28+
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
29+
| tee ./torch-constraints.txt
30+
31+
rapids-pip-retry download \
32+
--isolated \
33+
--prefer-binary \
34+
--no-deps \
35+
-d "${TORCH_WHEEL_DIR}" \
36+
--constraint "${PIP_CONSTRAINT}" \
37+
--constraint ./torch-constraints.txt \
38+
'torch'

ci/test_wheel.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44

55
set -eou pipefail
66

7+
# TODO(jameslamb): revert before merging
8+
git clone --branch generate-pip-constraints \
9+
https://github.com/rapidsai/gha-tools.git \
10+
/tmp/gha-tools
11+
12+
export PATH="/tmp/gha-tools/tools:${PATH}"
13+
714
source rapids-init-pip
815

916
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
@@ -12,17 +19,16 @@ LIBRMM_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="librmm_${RAPIDS_PY_CUDA_SUFFIX}" rapid
1219
RMM_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" rmm --stable --cuda "$RAPIDS_CUDA_VERSION")")
1320

1421
# generate constraints (possibly pinning to oldest support versions of dependencies)
15-
rapids-generate-pip-constraints test_python ./constraints.txt
22+
rapids-generate-pip-constraints test_python "${PIP_CONSTRAINT}"
1623

1724
# notes:
1825
#
1926
# * echo to expand wildcard before adding `[test]` requires for pip
20-
# * need to provide --constraint="${PIP_CONSTRAINT}" because that environment variable is
21-
# ignored if any other --constraint are passed via the CLI
27+
# * just providing --constraint="${PIP_CONSTRAINT}" to be explicit, and because
28+
# that environment variable is ignored if any other --constraint are passed via the CLI
2229
#
2330
rapids-pip-retry install \
2431
-v \
25-
--constraint ./constraints.txt \
2632
--constraint "${PIP_CONSTRAINT}" \
2733
"$(echo "${LIBRMM_WHEELHOUSE}"/librmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
2834
"$(echo "${RMM_WHEELHOUSE}"/rmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"

ci/test_wheel_integrations.sh

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,30 @@
44

55
set -eou pipefail
66

7-
RAPIDS_INIT_PIP_REMOVE_NVIDIA_INDEX="true"
8-
export RAPIDS_INIT_PIP_REMOVE_NVIDIA_INDEX
7+
# TODO(jameslamb): revert before merging
8+
git clone --branch generate-pip-constraints \
9+
https://github.com/rapidsai/gha-tools.git \
10+
/tmp/gha-tools
11+
12+
export PATH="/tmp/gha-tools/tools:${PATH}"
13+
914
source rapids-init-pip
1015

1116
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
1217
LIBRMM_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="librmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
1318
RMM_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" rmm --stable --cuda "$RAPIDS_CUDA_VERSION")")
1419

1520
# generate constraints (possibly pinning to oldest support versions of dependencies)
16-
rapids-generate-pip-constraints test_python ./constraints.txt
21+
rapids-generate-pip-constraints test_python "${PIP_CONSTRAINT}"
1722

1823
# notes:
1924
#
2025
# * echo to expand wildcard before adding `[test]` requires for pip
21-
# * need to provide --constraint="${PIP_CONSTRAINT}" because that environment variable is
22-
# ignored if any other --constraint are passed via the CLI
26+
# * just providing --constraint="${PIP_CONSTRAINT}" to be explicit, and because
27+
# that environment variable is ignored if any other --constraint are passed via the CLI
2328
#
2429
PIP_INSTALL_SHARED_ARGS=(
25-
--constraint=./constraints.txt
30+
--prefer-binary
2631
--constraint="${PIP_CONSTRAINT}"
2732
"$(echo "${LIBRMM_WHEELHOUSE}"/librmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)"
2833
"$(echo "${RMM_WHEELHOUSE}"/rmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"
@@ -40,18 +45,18 @@ CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2)
4045
echo "::group::PyTorch Tests"
4146

4247
if [ "${CUDA_MAJOR}" -gt 12 ] || { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -ge 8 ]; }; then
43-
rapids-logger "Generating PyTorch test requirements"
44-
rapids-dependency-file-generator \
45-
--output requirements \
46-
--file-key test_wheels_pytorch \
47-
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
48-
| tee test-pytorch-requirements.txt
48+
49+
# ensure a CUDA variant of 'torch' is used
50+
rapids-logger "Downloading PyTorch CUDA wheels"
51+
TORCH_WHEEL_DIR="$(mktemp -d)"
52+
./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}"
4953

5054
rapids-logger "Installing PyTorch test requirements"
5155
rapids-pip-retry install \
5256
-v \
5357
"${PIP_INSTALL_SHARED_ARGS[@]}" \
54-
-r test-pytorch-requirements.txt
58+
-r test-pytorch-requirements.txt \
59+
"${TORCH_WHEEL_DIR}"/torch-*.whl
5560

5661
timeout 15m python -m pytest -k "torch" ./python/rmm/rmm/tests \
5762
&& EXITCODE_PYTORCH=$? || EXITCODE_PYTORCH=$?
@@ -71,7 +76,7 @@ rapids-logger "Generating CuPy test requirements"
7176
rapids-dependency-file-generator \
7277
--output requirements \
7378
--file-key test_wheels_cupy \
74-
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
79+
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};use_cuda_wheels=true" \
7580
| tee test-cupy-requirements.txt
7681

7782
rapids-logger "Installing CuPy test requirements"

dependencies.yaml

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@ files:
4646
- depends_on_cupy
4747
- depends_on_librmm
4848
- depends_on_rmm
49-
test_wheels_pytorch:
50-
output: none
51-
includes:
52-
- depends_on_pytorch
5349
test_wheels_cupy:
5450
output: none
5551
includes:
@@ -131,6 +127,10 @@ files:
131127
key: test
132128
includes:
133129
- test_python
130+
torch_only:
131+
output: none
132+
includes:
133+
- depends_on_pytorch
134134
channels:
135135
- rapidsai-nightly
136136
- rapidsai
@@ -238,6 +238,8 @@ dependencies:
238238
- output_types: conda
239239
packages:
240240
- &doxygen doxygen=1.9.1
241+
# 'cuda_version' intentionally does not contain fallback entries... we want
242+
# a loud error if an unsupported 'cuda' value is passed
241243
cuda_version:
242244
specific:
243245
- output_types: conda
@@ -270,6 +272,50 @@ dependencies:
270272
cuda: "13.1"
271273
packages:
272274
- cuda-version=13.1
275+
- output_types: requirements
276+
matrices:
277+
# if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels
278+
# (e.g. for DLFW and pip devcontainers)
279+
- matrix:
280+
use_cuda_wheels: "false"
281+
packages:
282+
- matrix:
283+
arch: aarch64
284+
cuda: "12.2"
285+
use_cuda_wheels: "true"
286+
packages:
287+
# nvidia-cublas-cu12 didn't get aarch64 wheels until CTK 12.3
288+
- cuda-toolkit>=12.2,<12.4
289+
- matrix:
290+
cuda: "12.2"
291+
use_cuda_wheels: "true"
292+
packages:
293+
- cuda-toolkit==12.2.*
294+
- matrix:
295+
cuda: "12.5"
296+
use_cuda_wheels: "true"
297+
packages:
298+
- cuda-toolkit==12.5.*
299+
- matrix:
300+
cuda: "12.8"
301+
use_cuda_wheels: "true"
302+
packages:
303+
- cuda-toolkit==12.8.*
304+
- matrix:
305+
cuda: "12.9"
306+
use_cuda_wheels: "true"
307+
packages:
308+
- cuda-toolkit==12.9.*
309+
- matrix:
310+
cuda: "13.0"
311+
use_cuda_wheels: "true"
312+
packages:
313+
- cuda-toolkit==13.0.*
314+
- matrix:
315+
cuda: "13.1"
316+
use_cuda_wheels: "true"
317+
packages:
318+
- cuda-toolkit==13.1.*
273319
develop:
274320
common:
275321
- output_types: conda
@@ -402,20 +448,36 @@ dependencies:
402448
packages:
403449
- pytorch-gpu>=2.10.0
404450
specific:
405-
- output_types: [requirements, pyproject]
451+
# For [requirements], include --extra-index-url and CUDA-specific version modifiers
452+
# to ensure we get CUDA builds at test time.
453+
#
454+
# These 'pytorch.org' indices host CPU-only variants too, so requirements like '>=' are not safe.
455+
# Using '==' and a version with the CUDA specifier like '+cu130' is the most reliable way to ensure
456+
# the packages we want are pulled (at the expense of needing to maintain this list).
457+
- output_types: requirements
406458
matrices:
459+
- matrix:
460+
cuda: "12.2"
461+
dependencies: "oldest"
462+
packages:
463+
- --extra-index-url=https://download.pytorch.org/whl/cu124
464+
- torch==2.4.0+cu124
407465
- matrix:
408466
cuda: "12.*"
409467
packages:
410-
- --extra-index-url=https://download.pytorch.org/whl/cu128
468+
- --extra-index-url=https://download.pytorch.org/whl/cu129
469+
- torch==2.9.0+cu129
411470
- matrix:
471+
cuda: "13.0"
472+
dependencies: "oldest"
412473
packages:
413-
- --extra-index-url=https://download.pytorch.org/whl/cu130
414-
- output_types: [requirements, pyproject]
415-
matrices:
474+
- &torch_index_cu13 --extra-index-url=https://download.pytorch.org/whl/cu130
475+
- torch==2.9.0+cu130
416476
- matrix:
477+
cuda: "13.*"
417478
packages:
418-
- torch>=2.10.0
479+
- *torch_index_cu13
480+
- torch==2.10.0+cu130
419481
depends_on_cupy:
420482
common:
421483
- output_types: conda

0 commit comments

Comments
 (0)