build wheels with CUDA 13.0.x, test wheels against mix of CTK versions

jameslamb · jameslamb · commit 5ee91d2d24cd · 2026-03-03T14:55:46.000-06:00
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -258,7 +258,7 @@ jobs:
   wheel-build-cpp:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@build-wheels-old-ctk
     with:
       matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
       build_type: pull-request
@@ -269,7 +269,7 @@ jobs:
   wheel-build-python:
     needs: wheel-build-cpp
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@build-wheels-old-ctk
     with:
       # Build a wheel for each CUDA x ARCH x minimum supported Python version
       matrix_filter: group_by({CUDA_VER, ARCH}) | map(min_by(.PY_VER | split(".") | map(tonumber)))
diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# [description]
+#
+#   Downloads a CUDA variant of 'torch' from the correct index, based on CUDA major version.
+#
+#   This exists to avoid using 'pip --extra-index-url', which could allow for CPU-only 'torch'
+#   to be downloaded from pypi.org.
+#
+
+set -e -u -o pipefail
+
+TORCH_WHEEL_DIR="${1}"
+
+# Ensure CUDA-enabled 'torch' packages are always used.
+#
+# Downloading + passing the downloaded file as a requirement forces the use of this
+# package, so we don't accidentally end up with a CPU-only 'torch' from 'pypi.org'
+# (which can happen because pip doesn't support index priority).
+#
+# Not appending this to PIP_CONSTRAINT, because we don't want the torch '--extra-index-url'
+# to leak outside of this script into other 'pip {download,install}'' calls.
+rapids-dependency-file-generator \
+    --output requirements \
+    --file-key "torch_only" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
+| tee ./torch-constraints.txt
+
+rapids-pip-retry download \
+  --isolated \
+  --prefer-binary \
+  --no-deps \
+  -d "${TORCH_WHEEL_DIR}" \
+  --constraint "${PIP_CONSTRAINT}" \
+  --constraint ./torch-constraints.txt \
+  'torch'
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
@@ -4,6 +4,13 @@
 
 set -eou pipefail
 
+# TODO(jameslamb): revert before merging
+git clone --branch generate-pip-constraints \
+    https://github.com/rapidsai/gha-tools.git \
+    /tmp/gha-tools
+
+export PATH="/tmp/gha-tools/tools:${PATH}"
+
 source rapids-init-pip
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
@@ -12,17 +19,16 @@ LIBRMM_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="librmm_${RAPIDS_PY_CUDA_SUFFIX}" rapid
 RMM_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" rmm --stable --cuda "$RAPIDS_CUDA_VERSION")")
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
-rapids-generate-pip-constraints test_python ./constraints.txt
+rapids-generate-pip-constraints test_python "${PIP_CONSTRAINT}"
 
 # notes:
 #
 #   * echo to expand wildcard before adding `[test]` requires for pip
-#   * need to provide --constraint="${PIP_CONSTRAINT}" because that environment variable is
-#     ignored if any other --constraint are passed via the CLI
+#   * just providing --constraint="${PIP_CONSTRAINT}" to be explicit, and because
+#     that environment variable is ignored if any other --constraint are passed via the CLI
 #
 rapids-pip-retry install \
     -v \
-    --constraint ./constraints.txt \
     --constraint "${PIP_CONSTRAINT}" \
     "$(echo "${LIBRMM_WHEELHOUSE}"/librmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
     "$(echo "${RMM_WHEELHOUSE}"/rmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"
diff --git a/ci/test_wheel_integrations.sh b/ci/test_wheel_integrations.sh
@@ -4,25 +4,30 @@
 
 set -eou pipefail
 
-RAPIDS_INIT_PIP_REMOVE_NVIDIA_INDEX="true"
-export RAPIDS_INIT_PIP_REMOVE_NVIDIA_INDEX
+# TODO(jameslamb): revert before merging
+git clone --branch generate-pip-constraints \
+    https://github.com/rapidsai/gha-tools.git \
+    /tmp/gha-tools
+
+export PATH="/tmp/gha-tools/tools:${PATH}"
+
 source rapids-init-pip
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
 LIBRMM_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="librmm_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 RMM_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" rmm --stable --cuda "$RAPIDS_CUDA_VERSION")")
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
-rapids-generate-pip-constraints test_python ./constraints.txt
+rapids-generate-pip-constraints test_python "${PIP_CONSTRAINT}"
 
 # notes:
 #
 #   * echo to expand wildcard before adding `[test]` requires for pip
-#   * need to provide --constraint="${PIP_CONSTRAINT}" because that environment variable is
-#     ignored if any other --constraint are passed via the CLI
+#   * just providing --constraint="${PIP_CONSTRAINT}" to be explicit, and because
+#     that environment variable is ignored if any other --constraint are passed via the CLI
 #
 PIP_INSTALL_SHARED_ARGS=(
-    --constraint=./constraints.txt
+    --prefer-binary
     --constraint="${PIP_CONSTRAINT}"
     "$(echo "${LIBRMM_WHEELHOUSE}"/librmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)"
     "$(echo "${RMM_WHEELHOUSE}"/rmm_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"
@@ -40,18 +45,18 @@ CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2)
 echo "::group::PyTorch Tests"
 
 if [ "${CUDA_MAJOR}" -gt 12 ] || { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -ge 8 ]; }; then
-    rapids-logger "Generating PyTorch test requirements"
-    rapids-dependency-file-generator \
-        --output requirements \
-        --file-key test_wheels_pytorch \
-        --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
-        | tee test-pytorch-requirements.txt
+
+    # ensure a CUDA variant of 'torch' is used
+    rapids-logger "Downloading PyTorch CUDA wheels"
+    TORCH_WHEEL_DIR="$(mktemp -d)"
+    ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}"
 
     rapids-logger "Installing PyTorch test requirements"
     rapids-pip-retry install \
         -v \
         "${PIP_INSTALL_SHARED_ARGS[@]}" \
-        -r test-pytorch-requirements.txt
+        -r test-pytorch-requirements.txt \
+        "${TORCH_WHEEL_DIR}"/torch-*.whl
 
     timeout 15m python -m pytest -k "torch" ./python/rmm/rmm/tests \
         && EXITCODE_PYTORCH=$? || EXITCODE_PYTORCH=$?
@@ -71,7 +76,7 @@ rapids-logger "Generating CuPy test requirements"
 rapids-dependency-file-generator \
     --output requirements \
     --file-key test_wheels_cupy \
-    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};use_cuda_wheels=true" \
     | tee test-cupy-requirements.txt
 
 rapids-logger "Installing CuPy test requirements"
diff --git a/dependencies.yaml b/dependencies.yaml
@@ -46,10 +46,6 @@ files:
       - depends_on_cupy
       - depends_on_librmm
       - depends_on_rmm
-  test_wheels_pytorch:
-    output: none
-    includes:
-      - depends_on_pytorch
   test_wheels_cupy:
     output: none
     includes:
@@ -131,6 +127,10 @@ files:
       key: test
     includes:
       - test_python
+  torch_only:
+    output: none
+    includes:
+      - depends_on_pytorch
 channels:
   - rapidsai-nightly
   - rapidsai
@@ -238,6 +238,8 @@ dependencies:
       - output_types: conda
         packages:
           - &doxygen doxygen=1.9.1
+  # 'cuda_version' intentionally does not contain fallback entries... we want
+  # a loud error if an unsupported 'cuda' value is passed
   cuda_version:
     specific:
       - output_types: conda
@@ -270,6 +272,50 @@ dependencies:
               cuda: "13.1"
             packages:
               - cuda-version=13.1
+      - output_types: requirements
+        matrices:
+          # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels
+          # (e.g. for DLFW and pip devcontainers)
+          - matrix:
+              use_cuda_wheels: "false"
+            packages:
+          - matrix:
+              arch: aarch64
+              cuda: "12.2"
+              use_cuda_wheels: "true"
+            packages:
+              # nvidia-cublas-cu12 didn't get aarch64 wheels until CTK 12.3
+              - cuda-toolkit>=12.2,<12.4
+          - matrix:
+              cuda: "12.2"
+              use_cuda_wheels: "true"
+            packages:
+              - cuda-toolkit==12.2.*
+          - matrix:
+              cuda: "12.5"
+              use_cuda_wheels: "true"
+            packages:
+              - cuda-toolkit==12.5.*
+          - matrix:
+              cuda: "12.8"
+              use_cuda_wheels: "true"
+            packages:
+              - cuda-toolkit==12.8.*
+          - matrix:
+              cuda: "12.9"
+              use_cuda_wheels: "true"
+            packages:
+              - cuda-toolkit==12.9.*
+          - matrix:
+              cuda: "13.0"
+              use_cuda_wheels: "true"
+            packages:
+              - cuda-toolkit==13.0.*
+          - matrix:
+              cuda: "13.1"
+              use_cuda_wheels: "true"
+            packages:
+              - cuda-toolkit==13.1.*
   develop:
     common:
       - output_types: conda
@@ -402,20 +448,36 @@ dependencies:
         packages:
           - pytorch-gpu>=2.10.0
     specific:
-      - output_types: [requirements, pyproject]
+      # For [requirements], include --extra-index-url and CUDA-specific version modifiers
+      # to ensure we get CUDA builds at test time.
+      #
+      # These 'pytorch.org' indices host CPU-only variants too, so requirements like '>=' are not safe.
+      # Using '==' and a version with the CUDA specifier like '+cu130' is the most reliable way to ensure
+      # the packages we want are pulled (at the expense of needing to maintain this list).
+      - output_types: requirements
         matrices:
+          - matrix:
+              cuda: "12.2"
+              dependencies: "oldest"
+            packages:
+              - --extra-index-url=https://download.pytorch.org/whl/cu124
+              - torch==2.4.0+cu124
           - matrix:
               cuda: "12.*"
             packages:
-              - --extra-index-url=https://download.pytorch.org/whl/cu128
+              - --extra-index-url=https://download.pytorch.org/whl/cu129
+              - torch==2.9.0+cu129
           - matrix:
+              cuda: "13.0"
+              dependencies: "oldest"
             packages:
-              - --extra-index-url=https://download.pytorch.org/whl/cu130
-      - output_types: [requirements, pyproject]
-        matrices:
+              - &torch_index_cu13 --extra-index-url=https://download.pytorch.org/whl/cu130
+              - torch==2.9.0+cu130
           - matrix:
+              cuda: "13.*"
             packages:
-              - torch>=2.10.0
+              - *torch_index_cu13
+              - torch==2.10.0+cu130
   depends_on_cupy:
     common:
       - output_types: conda