diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 92ad7ba00..99482f1d2 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -23,6 +23,7 @@ jobs: - build-wheels - test-wheels - test-wheels-pynvjitlink + - test-wheels-deps-wheels - build-docs secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.04 @@ -94,6 +95,14 @@ jobs: script: "ci/test_wheel_pynvjitlink.sh" # This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version". matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + test-wheels-deps-wheels: + needs: + - build-wheels + uses: ./.github/workflows/wheels-test.yaml + with: + build_type: pull-request + script: "ci/test_wheel_deps_wheels.sh" + matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build-docs: needs: - build-conda diff --git a/ci/test_conda.sh b/ci/test_conda.sh index ad8a13050..2f86e4f1e 100755 --- a/ci/test_conda.sh +++ b/ci/test_conda.sh @@ -9,8 +9,13 @@ if [ "${CUDA_VER%.*.*}" = "11" ]; then CTK_PACKAGES="cudatoolkit" else CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc libcurand-dev" + apt-get update + apt remove --purge `dpkg --get-selections | grep cuda-nvvm | awk '{print $1}'` -y + apt remove --purge `dpkg --get-selections | grep cuda-nvrtc | awk '{print $1}'` -y fi + + rapids-logger "Install testing dependencies" # TODO: Replace with rapids-dependency-file-generator rapids-mamba-retry create -n test \ diff --git a/ci/test_wheel_deps_wheels.sh b/ci/test_wheel_deps_wheels.sh new file mode 100755 index 000000000..c8c135b49 --- /dev/null +++ b/ci/test_wheel_deps_wheels.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# Copyright (c) 2023-2024, NVIDIA CORPORATION + +set -euo pipefail + +rapids-logger "Install testing dependencies" +# TODO: Replace with rapids-dependency-file-generator +python -m pip install \ + psutil \ + cffi \ + cuda-python \ + nvidia-cuda-runtime-cu12 \ + nvidia-curand-cu12 \ + nvidia-cuda-nvcc-cu12 \ + nvidia-cuda-nvrtc-cu12 \ + pynvjitlink-cu12 \ + pytest + + +rapids-logger "Build tests" +PY_SCRIPT=" +import numba_cuda +root = numba_cuda.__file__.rstrip('__init__.py') +test_dir = root + \"numba/cuda/tests/test_binary_generation/\" +print(test_dir) +" + +NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT") +pushd $NUMBA_CUDA_TEST_BIN_DIR +make +popd + +rapids-logger "Install wheel" +package=$(realpath wheel/numba_cuda*.whl) +echo "Package path: $package" +python -m pip install $package + +rapids-logger "Check GPU usage" +nvidia-smi + +RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}/ +mkdir -p "${RAPIDS_TESTS_DIR}" +pushd "${RAPIDS_TESTS_DIR}" + +rapids-logger "Show Numba system info" +python -m numba --sysinfo + +# remove cuda-nvvm-12-5 leaving libnvvm.so from nvidia-cuda-nvcc-cu12 only +apt-get update +apt remove --purge `dpkg --get-selections | grep cuda-nvvm | awk '{print $1}'` -y +apt remove --purge `dpkg --get-selections | grep cuda-nvrtc | awk '{print $1}'` -y + +rapids-logger "Run Tests" +NUMBA_CUDA_ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v + +popd diff --git a/numba_cuda/numba/cuda/cuda_paths.py b/numba_cuda/numba/cuda/cuda_paths.py index 7d7f7ce6f..bfb418c26 100644 --- a/numba_cuda/numba/cuda/cuda_paths.py +++ b/numba_cuda/numba/cuda/cuda_paths.py @@ -3,14 +3,48 @@ import os from collections import namedtuple import platform - +import site +from pathlib import Path from numba.core.config import IS_WIN32 -from numba.misc.findlib import find_lib, find_file +from numba.misc.findlib import find_lib from numba import config - +import ctypes _env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) +SEARCH_PRIORITY = [ + "Conda environment", + "Conda environment (NVIDIA package)", + "NVIDIA NVCC Wheel", + "CUDA_HOME", + "System", + "Debian package", +] + + +def _priority_index(label): + if label in SEARCH_PRIORITY: + return SEARCH_PRIORITY.index(label) + else: + raise ValueError(f"Can't determine search priority for {label}") + + +def _find_first_valid_lazy(options): + sorted_options = sorted(options, key=lambda x: _priority_index(x[0])) + for label, fn in sorted_options: + value = fn() + if value: + return label, value + return "", None + + +def _build_options(pairs): + """Sorts and returns a list of (label, value) tuples according to SEARCH_PRIORITY.""" + priority_index = {label: i for i, label in enumerate(SEARCH_PRIORITY)} + return sorted( + pairs, key=lambda pair: priority_index.get(pair[0], float("inf")) + ) + def _find_valid_path(options): """Find valid path from *options*, which is a list of 2-tuple of @@ -25,15 +59,17 @@ def _find_valid_path(options): def _get_libdevice_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), - ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), - ("System", get_system_ctk("nvvm", "libdevice")), - ("Debian package", get_debian_pkg_libdevice()), - ] - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk), + ("CUDA_HOME", lambda: get_cuda_home("nvvm", "libdevice")), + ("NVIDIA NVCC Wheel", get_libdevice_wheel), + ("System", lambda: get_system_ctk("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice), + ] + ) + return _find_first_valid_lazy(options) def _nvvm_lib_dir(): @@ -45,22 +81,127 @@ def _nvvm_lib_dir(): def _get_nvvm_path_decision(): options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), - ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), - ("System", get_system_ctk(*_nvvm_lib_dir())), + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel), + ("CUDA_HOME", lambda: get_cuda_home(*_nvvm_lib_dir())), + ("System", lambda: get_system_ctk(*_nvvm_lib_dir())), ] - by, path = _find_valid_path(options) - return by, path + return _find_first_valid_lazy(options) + + +def _get_nvrtc_system_ctk(): + sys_path = get_system_ctk("bin" if IS_WIN32 else "lib64") + candidates = find_lib("nvrtc", sys_path) + if candidates: + return max(candidates) + + +def _get_nvrtc_path_decision(): + options = _build_options( + [ + ("CUDA_HOME", lambda: get_cuda_home("nvrtc")), + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), + ("NVIDIA NVCC Wheel", _get_nvrtc_wheel), + ("System", _get_nvrtc_system_ctk), + ] + ) + return _find_first_valid_lazy(options) + + +def _get_nvvm_wheel(): + platform_map = { + "linux": ("lib64", "libnvvm.so"), + "win32": ("bin", "nvvm64_40_0.dll"), + } + + for plat, (dso_dir, dso_path) in platform_map.items(): + if sys.platform.startswith(plat): + break + else: + raise NotImplementedError("Unsupported platform") + + site_paths = [site.getusersitepackages()] + site.getsitepackages() + + for sp in filter(None, site_paths): + nvvm_path = Path(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir, dso_path) + if nvvm_path.exists(): + return str(nvvm_path.parent) + + return None + + +def get_major_cuda_version(): + # TODO: remove once cuda-python is + # a hard dependency + from numba.cuda.cudadrv.runtime import get_version + + return get_version()[0] + + +def get_nvrtc_dso_path(): + site_paths = [site.getusersitepackages()] + site.getsitepackages() + for sp in site_paths: + lib_dir = os.path.join( + sp, + "nvidia", + "cuda_nvrtc", + ("bin" if IS_WIN32 else "lib") if sp else None, + ) + if lib_dir and os.path.exists(lib_dir): + try: + major = get_major_cuda_version() + if major == 11: + cu_ver = "112" if IS_WIN32 else "11.2" + elif major == 12: + cu_ver = "120" if IS_WIN32 else "12" + else: + raise NotImplementedError(f"CUDA {major} is not supported") + + return os.path.join( + lib_dir, + f"nvrtc64_{cu_ver}_0.dll" + if IS_WIN32 + else f"libnvrtc.so.{cu_ver}", + ) + except RuntimeError: + continue + + +def _get_nvrtc_wheel(): + dso_path = get_nvrtc_dso_path() + if dso_path: + try: + result = ctypes.CDLL(dso_path, mode=ctypes.RTLD_GLOBAL) + except OSError: + pass + else: + if IS_WIN32: + import win32api + + # This absolute path will + # always be correct regardless of the package source + nvrtc_path = win32api.GetModuleFileNameW(result._handle) + dso_dir = os.path.dirname(nvrtc_path) + builtins_path = os.path.join( + dso_dir, + [ + f + for f in os.listdir(dso_dir) + if re.match("^nvrtc-builtins.*.dll$", f) + ][0], + ) + if not os.path.exists(builtins_path): + raise RuntimeError( + f'Path does not exist: "{builtins_path}"' + ) + return Path(dso_path) def _get_libdevice_paths(): by, libdir = _get_libdevice_path_decision() - # Search for pattern - pat = r"libdevice(\.\d+)*\.bc$" - candidates = find_file(re.compile(pat), libdir) - # Keep only the max (most recent version) of the bitcode files. - out = max(candidates, default=None) + out = os.path.join(libdir, "libdevice.10.bc") return _env_path_tuple(by, out) @@ -78,26 +219,46 @@ def _cuda_home_static_cudalib_path(): return ("lib64",) +def _get_cudalib_wheel(): + """Get the cudalib path from the NVCC wheel.""" + site_paths = [site.getusersitepackages()] + site.getsitepackages() + libdir = "bin" if IS_WIN32 else "lib" + for sp in filter(None, site_paths): + cudalib_path = Path(sp, "nvidia", "cuda_runtime", libdir) + if cudalib_path.exists(): + return str(cudalib_path) + return None + + def _get_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), + ("NVIDIA NVCC Wheel", _get_cudalib_wheel), + ("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())), + ("System", lambda: get_system_ctk(_cudalib_path())), + ] + ) + return _find_first_valid_lazy(options) def _get_static_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ( + "Conda environment (NVIDIA package)", + get_nvidia_static_cudalib_ctk, + ), + ( + "CUDA_HOME", + lambda: get_cuda_home(*_cuda_home_static_cudalib_path()), + ), + ("System", lambda: get_system_ctk(_cudalib_path())), + ] + ) + return _find_first_valid_lazy(options) def _get_cudalib_dir(): @@ -113,12 +274,12 @@ def _get_static_cudalib_dir(): def get_system_ctk(*subdirs): """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" # Linux? - if sys.platform.startswith("linux"): + if not IS_WIN32: # Is cuda alias to /usr/local/cuda? # We are intentionally not getting versioned cuda installation. - base = "/usr/local/cuda" - if os.path.exists(base): - return os.path.join(base, *subdirs) + result = os.path.join("/usr/local/cuda", *subdirs) + if os.path.exists(result): + return result def get_conda_ctk(): @@ -211,8 +372,35 @@ def get_cuda_home(*subdirs): def _get_nvvm_path(): by, path = _get_nvvm_path_decision() - candidates = find_lib("nvvm", path) - path = max(candidates) if candidates else None + + if by == "NVIDIA NVCC Wheel": + platform_map = { + "linux": "libnvvm.so", + "win32": "nvvm64_40_0.dll", + } + + for plat, dso_name in platform_map.items(): + if sys.platform.startswith(plat): + break + else: + raise NotImplementedError("Unsupported platform") + + path = os.path.join(path, dso_name) + else: + candidates = find_lib("nvvm", path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + +def _get_nvrtc_path(): + by, path = _get_nvrtc_path_decision() + if by == "NVIDIA NVCC Wheel": + path = str(path) + elif by == "System": + return _env_path_tuple(by, path) + else: + candidates = find_lib("nvrtc", path) + path = max(candidates) if candidates else None return _env_path_tuple(by, path) @@ -234,6 +422,7 @@ def get_cuda_paths(): # Not in cache d = { "nvvm": _get_nvvm_path(), + "nvrtc": _get_nvrtc_path(), "libdevice": _get_libdevice_paths(), "cudalib_dir": _get_cudalib_dir(), "static_cudalib_dir": _get_static_cudalib_dir(), @@ -255,6 +444,16 @@ def get_debian_pkg_libdevice(): return pkg_libdevice_location +def get_libdevice_wheel(): + nvvm_path = _get_nvvm_wheel() + if nvvm_path is None: + return None + nvvm_path = Path(nvvm_path) + libdevice_path = nvvm_path.parent / "libdevice" + + return str(libdevice_path) + + def get_current_cuda_target_name(): """Determine conda's CTK target folder based on system and machine arch. diff --git a/numba_cuda/numba/cuda/cudadrv/libs.py b/numba_cuda/numba/cuda/cudadrv/libs.py index 7388db898..ce4d99113 100644 --- a/numba_cuda/numba/cuda/cudadrv/libs.py +++ b/numba_cuda/numba/cuda/cudadrv/libs.py @@ -50,8 +50,8 @@ def get_cudalib(lib, static=False): 'libnvvm.so' for 'nvvm') so that we may attempt to load it using the system loader's search mechanism. """ - if lib == "nvvm": - return get_cuda_paths()["nvvm"].info or _dllnamepattern % "nvvm" + if lib in {"nvrtc", "nvvm"}: + return get_cuda_paths()[lib].info or _dllnamepattern % lib else: dir_type = "static_cudalib_dir" if static else "cudalib_dir" libdir = get_cuda_paths()[dir_type].info @@ -92,6 +92,8 @@ def check_static_lib(path): def _get_source_variable(lib, static=False): if lib == "nvvm": return get_cuda_paths()["nvvm"].by + elif lib == "nvrtc": + return get_cuda_paths()["nvrtc"].by elif lib == "libdevice": return get_cuda_paths()["libdevice"].by elif lib == "include_dir":