-
Notifications
You must be signed in to change notification settings - Fork 36
ensure 'torch' CUDA wheels are installed in CI, test that 'torch' is an optional dependency #425
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
13825b5
698f115
1c457b8
066d5c4
6f73e44
271eb7e
5a4064e
0d7215e
fc30204
97e2c02
355d5aa
4aad5b4
426c5ff
7ac88d3
104b8bf
4b479f7
d055f9d
7bbf218
4cacebf
36843b6
4952952
11ed00e
b1cb02c
ca6e314
2f3d4f8
22fb749
79b7854
2633d4f
6039796
005a890
22ded28
bbe4c97
72779bd
2cf1e1c
2192089
b827cc2
6a958e6
2c3d0d0
40cdfa8
41c5277
eed447c
79a6efe
a61a427
96201b6
b8276e2
456857a
0afcdd7
27f8fdd
abf7313
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| #!/bin/bash | ||
| # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| # [description] | ||
| # | ||
| # Downloads a CUDA variant of 'torch' from the correct index, based on CUDA major version. | ||
| # | ||
| # This exists to avoid using 'pip --extra-index-url', which has these undesirable properties: | ||
| # | ||
| # - allows for CPU-only 'torch' to be downloaded from pypi.org | ||
| # - allows for other non-torch packages like 'numpy' to be downloaded from the PyTorch indices | ||
| # - increases solve complexity for 'pip' | ||
| # | ||
|
|
||
| set -e -u -o pipefail | ||
|
|
||
| TORCH_WHEEL_DIR="${1}" | ||
|
|
||
| # skip download attempt on CUDA versions where we know there isn't a 'torch' CUDA wheel. | ||
| CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" | ||
| CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) | ||
| if \ | ||
| { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -lt 9 ]; } \ | ||
| || { [ "${CUDA_MAJOR}" -eq 13 ] && [ "${CUDA_MINOR}" -gt 0 ]; } \ | ||
| || [ "${CUDA_MAJOR}" -gt 13 ]; | ||
| then | ||
| rapids-logger "Skipping 'torch' wheel download. (requires CUDA 12.9+ or 13.0, found ${RAPIDS_CUDA_VERSION})" | ||
| exit 0 | ||
| fi | ||
|
|
||
| # Ensure CUDA-enabled 'torch' packages are always used. | ||
| # | ||
| # Downloading + passing the downloaded file as a requirement forces the use of this | ||
| # package and ensures 'pip' considers all of its requirements. | ||
| # | ||
| # Not appending this to PIP_CONSTRAINT, because we don't want the torch '--extra-index-url' | ||
| # to leak outside of this script into other 'pip {download,install}'' calls. | ||
| rapids-dependency-file-generator \ | ||
| --output requirements \ | ||
| --file-key "torch_only" \ | ||
| --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES};require_gpu=true" \ | ||
| | tee ./torch-constraints.txt | ||
|
|
||
| rapids-pip-retry download \ | ||
| --isolated \ | ||
| --prefer-binary \ | ||
| --no-deps \ | ||
| -d "${TORCH_WHEEL_DIR}" \ | ||
| --constraint "${PIP_CONSTRAINT}" \ | ||
| --constraint ./torch-constraints.txt \ | ||
| 'torch' | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,10 @@ | ||
| #!/bin/bash | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| set -euo pipefail | ||
|
|
||
| # Support invoking run_pytests.sh outside the script directory | ||
| cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/ | ||
|
|
||
| pytest --cache-clear --forked --import-mode=append "$@" tests | ||
| pytest -rs --cache-clear --forked --import-mode=append "$@" tests |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,12 +15,30 @@ LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_ | |
| PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") | ||
| CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) | ||
|
|
||
| CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" | ||
| # generate constraints (possibly pinning to oldest support versions of dependencies) | ||
| rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" | ||
|
|
||
| if [[ "${CUDA_MAJOR}" == "12" ]]; then | ||
| PYTORCH_INDEX="https://download.pytorch.org/whl/cu126" | ||
| PIP_INSTALL_ARGS=( | ||
| --prefer-binary | ||
| --constraint "${PIP_CONSTRAINT}" | ||
| --extra-index-url 'https://pypi.nvidia.com' | ||
| "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl | ||
| "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" | ||
| "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" | ||
| ) | ||
|
|
||
| # ensure a CUDA variant of 'torch' is used (if one is available) | ||
| TORCH_WHEEL_DIR="$(mktemp -d)" | ||
| ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}" | ||
|
|
||
| # 'cugraph-pyg' is still expected to be importable | ||
| # and testable in an environment where 'torch' isn't installed. | ||
| torch_downloaded=true | ||
| if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then | ||
| rapids-echo-stderr "No 'torch' wheels downloaded." | ||
| torch_downloaded=false | ||
| else | ||
| PYTORCH_INDEX="https://download.pytorch.org/whl/cu130" | ||
| PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) | ||
| fi | ||
|
|
||
| # notes: | ||
|
|
@@ -30,12 +48,7 @@ fi | |
| # its dependencies are available from pypi.org | ||
| # | ||
| rapids-pip-retry install \ | ||
| -v \ | ||
| --extra-index-url "${PYTORCH_INDEX}" \ | ||
| --extra-index-url 'https://pypi.nvidia.com' \ | ||
| "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \ | ||
| "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ | ||
| "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" | ||
| "${PIP_INSTALL_ARGS[@]}" | ||
|
|
||
| # RAPIDS_DATASET_ROOT_DIR is used by test scripts | ||
| export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" | ||
|
|
@@ -47,5 +60,34 @@ popd | |
| # Enable legacy behavior of torch.load for examples relying on ogb | ||
| export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 | ||
|
|
||
| rapids-logger "pytest cugraph-pyg (single GPU)" | ||
| if [[ "${torch_downloaded}" == "true" ]]; then | ||
| # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements | ||
| # | ||
| # * https://github.com/rapidsai/cugraph/issues/5443 | ||
| # * https://github.com/rapidsai/build-planning/issues/257 | ||
| # * https://github.com/rapidsai/build-planning/issues/255 | ||
| # | ||
| CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" | ||
| CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) | ||
| if [[ "${CUDA_MAJOR}" == "13" ]]; then | ||
| pip install \ | ||
| --upgrade \ | ||
| "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}" | ||
| fi | ||
|
|
||
| # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually | ||
| # installed here and that we've installed a package with CUDA support. | ||
| rapids-logger "Confirming that PyTorch is installed" | ||
| python -c "import torch; assert torch.cuda.is_available()" | ||
|
|
||
| rapids-logger "pytest cugraph-pyg (single GPU, with 'torch')" | ||
| ./ci/run_cugraph_pyg_pytests.sh | ||
| fi | ||
|
|
||
| rapids-logger "import cugraph-pyg (no 'torch')" | ||
| ./ci/uninstall-torch-wheels.sh | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reason you couldn't do the no-torch tests before the torch tests? That would save you the trouble of uninstalling torch.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because to run the tests without Imperative code that force-uninstalls |
||
|
|
||
| python -c "import cugraph_pyg; print(f'cugraph-pyg version: {cugraph_pyg.__version__}')" | ||
|
|
||
| rapids-logger "pytest cugraph-pyg (no 'torch')" | ||
| ./ci/run_cugraph_pyg_pytests.sh | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,9 +2,7 @@ | |
| # SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| set -e # abort the script on error | ||
| set -o pipefail # piped commands propagate their error | ||
| set -E # ERR traps are inherited by subcommands | ||
| set -euo pipefail | ||
|
|
||
| # Delete system libnccl.so to ensure the wheel is used. | ||
| # (but only do this in CI, to avoid breaking local dev environments) | ||
|
|
@@ -18,23 +16,68 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" | |
| LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) | ||
| PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") | ||
|
|
||
| # determine pytorch source | ||
| if [[ "${CUDA_MAJOR}" == "12" ]]; then | ||
| PYTORCH_INDEX="https://download.pytorch.org/whl/cu126" | ||
| else | ||
| PYTORCH_INDEX="https://download.pytorch.org/whl/cu130" | ||
| fi | ||
| RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} | ||
| RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} | ||
| mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" | ||
|
|
||
| # generate constraints (possibly pinning to oldest support versions of dependencies) | ||
| rapids-generate-pip-constraints test_pylibwholegraph "${PIP_CONSTRAINT}" | ||
|
|
||
| PIP_INSTALL_ARGS=( | ||
| --prefer-binary | ||
| --constraint "${PIP_CONSTRAINT}" | ||
| "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]" | ||
| "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl | ||
| ) | ||
|
|
||
| # ensure a CUDA variant of 'torch' is used (if one is available) | ||
| TORCH_WHEEL_DIR="$(mktemp -d)" | ||
| ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}" | ||
|
|
||
| # 'pylibwholegraph' is still expected to be importable | ||
| # and testable in an environment where 'torch' isn't installed. | ||
| torch_downloaded=true | ||
| if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then | ||
| rapids-echo-stderr "No 'torch' wheels downloaded." | ||
| torch_downloaded=false | ||
| else | ||
| PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) | ||
| fi | ||
|
|
||
| # echo to expand wildcard before adding `[extra]` requires for pip | ||
| rapids-logger "Installing Packages" | ||
| rapids-pip-retry install \ | ||
| --extra-index-url ${PYTORCH_INDEX} \ | ||
| "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]" \ | ||
| "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \ | ||
| 'torch>=2.3' | ||
| "${PIP_INSTALL_ARGS[@]}" | ||
|
|
||
|
|
||
| if [[ "${torch_downloaded}" == "true" ]]; then | ||
| # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements | ||
| # | ||
| # * https://github.com/rapidsai/cugraph/issues/5443 | ||
| # * https://github.com/rapidsai/build-planning/issues/257 | ||
| # * https://github.com/rapidsai/build-planning/issues/255 | ||
| # | ||
| CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" | ||
| CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) | ||
| if [[ "${CUDA_MAJOR}" == "13" ]]; then | ||
| pip install \ | ||
| --upgrade \ | ||
| "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}" | ||
| fi | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We know it's safe to use a newer |
||
|
|
||
| # 'torch' is an optional dependency of 'pylibwholegraph'... confirm that it's actually | ||
| # installed here and that we've installed a package with CUDA support. | ||
| rapids-logger "Confirming that PyTorch is installed" | ||
| python -c "import torch; assert torch.cuda.is_available()" | ||
|
|
||
| rapids-logger "pytest pylibwholegraph (with 'torch')" | ||
| ./ci/run_pylibwholegraph_pytests.sh | ||
| fi | ||
|
|
||
| rapids-logger "import pylibwholegraph (no 'torch')" | ||
| ./ci/uninstall-torch-wheels.sh | ||
|
|
||
| python -c "import pylibwholegraph; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')" | ||
|
|
||
| rapids-logger "pytest pylibwholegraph" | ||
| rapids-logger "pytest pylibwholegraph (no 'torch')" | ||
| ./ci/run_pylibwholegraph_pytests.sh | ||
jameslamb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| #!/bin/bash | ||
| # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| set -euo pipefail | ||
|
|
||
| pip uninstall --yes 'torch' | ||
|
|
||
| # 'pytest' leaves behind some pycache files in site-packages/torch that make 'import torch' | ||
| # seem to "work" even though there's not really a package there, leading to errors like | ||
| # "module 'torch' has no attribute 'distributed'" | ||
| # | ||
| # For the sake of testing, just fully delete 'torch' from site-packages to simulate an environment | ||
| # where it was never installed. | ||
| SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") | ||
| rm -rf "${SITE_PACKAGES}/torch" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,3 +43,22 @@ rapids-logger "validate packages with 'twine'" | |
| twine check \ | ||
| --strict \ | ||
| "$(echo ${wheel_dir_relative_path}/*.whl)" | ||
|
|
||
| rapids-logger "validating that the wheel doesn't depend on 'torch' (even in an extra)" | ||
| WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)" | ||
|
|
||
| # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric' | ||
| # Use '|| true' so grep not finding any matches (exit 1) does not kill the script under set -e | ||
| unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \ | ||
| | grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \ | ||
| | tee matches.txt || true | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| if [[ -s ./matches.txt ]]; then | ||
| echo -n "Wheel '${WHEEL_FILE}' appears to depend on 'torch'. Remove that dependency. " | ||
| echo -n "We prefer to not declare a 'torch' dependency and allow it to be managed separately, " | ||
| echo "to ensure tight control over the variants installed (including for DLFW builds)." | ||
| exit 1 | ||
| else | ||
| echo "No dependency on 'torch' found" | ||
| exit 0 | ||
| fi | ||
Uh oh!
There was an error while loading. Please reload this page.