Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,44 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
wheel-build-cuml:
wheel-build-libcuml:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
script: ci/build_wheel.sh
script: ci/build_wheel_libcuml.sh
# Note that this approach to cloning repos obviates any modification to
# the CMake variables in get_cumlprims_mg.cmake since CMake will just use
# the clone as is.
extra-repo: rapidsai/cumlprims_mg
extra-repo-sha: branch-25.04
extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
# build for every combination of arch and CUDA version, but only for the latest Python
matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
wheel-publish-libcuml:
needs: wheel-build-libcuml
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
package-name: libcuml
package-type: cpp
wheel-build-cuml:
needs: wheel-build-libcuml
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
script: ci/build_wheel_cuml.sh
wheel-publish-cuml:
needs: wheel-build-cuml
secrets: inherit
Expand Down
17 changes: 15 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- conda-notebook-tests
- docs-build
- telemetry-setup
- wheel-build-libcuml
- wheel-build-cuml
- wheel-tests-cuml
- devcontainer
Expand Down Expand Up @@ -166,16 +167,28 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-cuml:
wheel-build-libcuml:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04
with:
build_type: pull-request
script: ci/build_wheel.sh
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
script: ci/build_wheel_libcuml.sh
extra-repo: rapidsai/cumlprims_mg
extra-repo-sha: branch-25.04
extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY
# build for every combination of arch and CUDA version, but only for the latest Python
matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
wheel-build-cuml:
needs: [checks, wheel-build-libcuml]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02
with:
build_type: pull-request
script: ci/build_wheel_cuml.sh
wheel-tests-cuml:
needs: [wheel-build-cuml, changed-files]
secrets: inherit
Expand Down
7 changes: 1 addition & 6 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.

# cuml build script

Expand Down Expand Up @@ -288,11 +288,6 @@ if (! hasArg --configure-only) && (completeBuild || hasArg cuml || hasArg pydocs
# Replace spaces with semicolons in SKBUILD_EXTRA_CMAKE_ARGS
SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${SKBUILD_EXTRA_CMAKE_ARGS} | sed 's/ /;/g')

# Append `-DFIND_CUML_CPP=ON` to CUML_EXTRA_CMAKE_ARGS unless a user specified the option.
if [[ "${SKBUILD_EXTRA_CMAKE_ARGS}" != *"DFIND_CUML_CPP"* ]]; then
SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS};-DFIND_CUML_CPP=ON"
fi

SKBUILD_CMAKE_ARGS="-DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL};${SKBUILD_EXTRA_CMAKE_ARGS}" \
python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/cuml

Expand Down
45 changes: 8 additions & 37 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
@@ -1,55 +1,26 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/cuml"
package_name=$1
package_dir=$2

source rapids-configure-sccache
source rapids-date-string

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

rapids-generate-version > ./VERSION

cd ${package_dir}

case "${RAPIDS_CUDA_VERSION}" in
12.*)
EXCLUDE_ARGS=(
--exclude "libcuvs.so"
--exclude "libcublas.so.12"
--exclude "libcublasLt.so.12"
--exclude "libcufft.so.11"
--exclude "libcurand.so.10"
--exclude "libcusolver.so.11"
--exclude "libcusparse.so.12"
--exclude "libnvJitLink.so.12"
)
EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
;;
11.*)
EXCLUDE_ARGS=(
--exclude "libcuvs.so"
)
EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
;;
esac
cd "${package_dir}"

sccache --zero-stats

SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/;-DUSE_CUVS_WHEEL=ON${EXTRA_CMAKE_ARGS}" \
python -m pip wheel . \
rapids-logger "Building '${package_name}' wheel"
python -m pip wheel \
-w dist \
-v \
--no-deps \
--disable-pip-version-check
--disable-pip-version-check \
.

sccache --show-adv-stats

mkdir -p final_dist
python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/*

../../ci/validate_wheel.sh final_dist

RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist
42 changes: 42 additions & 0 deletions ci/build_wheel_cuml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

package_name="cuml"
package_dir="python/cuml"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

# Download the libcuml wheel built in the previous step and make it
# available for pip to find.
RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcuml_dist
echo "libcuml-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcuml_dist/libcuml_*.whl)" >> /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"

EXCLUDE_ARGS=(
--exclude "libcuml++.so"
--exclude "libcumlprims_mg.so"
--exclude "libcuvs.so"
--exclude "libraft.so"
--exclude "libcublas.so.*"
--exclude "libcublasLt.so.*"
--exclude "libcufft.so.*"
--exclude "libcurand.so.*"
--exclude "libcusolver.so.*"
--exclude "libcusparse.so.*"
--exclude "libnvJitLink.so.*"
)

export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DSINGLEGPU=OFF;-DUSE_LIBCUML_WHEEL=ON"
./ci/build_wheel.sh "${package_name}" "${package_dir}"

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair \
"${EXCLUDE_ARGS[@]}" \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python "${package_dir}/final_dist"
56 changes: 56 additions & 0 deletions ci/build_wheel_libcuml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
# Copyright (c) 2025, NVIDIA CORPORATION.

set -euo pipefail

package_name="libcuml"
package_dir="python/libcuml"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

rapids-logger "Generating build requirements"

rapids-dependency-file-generator \
--output requirements \
--file-key "py_build_${package_name}" \
--file-key "py_rapids_build_${package_name}" \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
| tee /tmp/requirements-build.txt

rapids-logger "Installing build requirements"
python -m pip install \
-v \
--prefer-binary \
-r /tmp/requirements-build.txt

# build with '--no-build-isolation', for better sccache hit rate
# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
export PIP_NO_BUILD_ISOLATION=0

# NOTE: 'libcumlprims_mg.so' is marked as '--exclude' here because auditwheel doesn't detect it,
# but it really is intentionally included in 'libcuml' wheels
EXCLUDE_ARGS=(
--exclude "libcumlprims_mg.so"
--exclude "libcuvs.so"
--exclude "libraft.so"
--exclude "libcublas.so.*"
--exclude "libcublasLt.so.*"
--exclude "libcufft.so.*"
--exclude "libcurand.so.*"
--exclude "libcusolver.so.*"
--exclude "libcusparse.so.*"
--exclude "libnvJitLink.so.*"
)

export SKBUILD_CMAKE_ARGS="-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/"
./ci/build_wheel.sh "${package_name}" "${package_dir}"

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair \
"${EXCLUDE_ARGS[@]}" \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

./ci/validate_wheel.sh ${package_dir} final_dist

RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
2 changes: 2 additions & 0 deletions ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ set -euo pipefail
mkdir -p ./dist
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
./dist/libcuml*.whl \
"$(echo ./dist/cuml*.whl)[test]"

RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
Expand Down
7 changes: 5 additions & 2 deletions ci/validate_wheel.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

set -euo pipefail

wheel_dir_relative_path=$1
package_dir=$1
wheel_dir_relative_path=$2

cd "${package_dir}"

rapids-logger "validate packages with 'pydistcheck'"

Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ dependencies:
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- libcuvs==25.4.*,>=0.0.0a0
- libraft-headers==25.4.*,>=0.0.0a0
- libraft==25.4.*,>=0.0.0a0
- librmm==25.4.*,>=0.0.0a0
- nbsphinx
- ninja
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libcuvs==25.4.*,>=0.0.0a0
- libraft-headers==25.4.*,>=0.0.0a0
- libraft==25.4.*,>=0.0.0a0
- librmm==25.4.*,>=0.0.0a0
- nbsphinx
- ninja
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ endif()

if(BUILD_CUML_TESTS)
include(CTest)
add_subdirectory(test)
add_subdirectory(tests)
endif()

##############################################################################
Expand Down
8 changes: 7 additions & 1 deletion cpp/cmake/modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,6 +35,12 @@ if(DISABLE_DEPRECATION_WARNINGS)
list(APPEND CUML_CUDA_FLAGS -Wno-deprecated-declarations -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
endif()

# Allow invalid CUDA kernels in the short term
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0)
list(APPEND CUML_CUDA_FLAGS -static-global-template-stub=false)
endif()


# make sure we produce smallest binary size
list(APPEND CUML_CUDA_FLAGS -Xfatbin=-compress-all)

Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_cuvs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ function(find_and_configure_cuvs)
EXCLUDE_FROM_ALL ${PKG_EXCLUDE_FROM_ALL}
OPTIONS
"BUILD_TESTS OFF"
"BUILD_BENCH OFF"
"BUILD_CAGRA_HNSWLIB OFF"
"BUILD_CUVS_BENCH OFF"
"BUILD_MG_ALGOS ${CUVS_BUILD_MG_ALGOS}"

)
Expand Down
3 changes: 2 additions & 1 deletion cpp/cmake/thirdparty/get_treelite.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,7 @@ function(find_and_configure_treelite)

rapids_cpm_find(Treelite ${PKG_VERSION}
GLOBAL_TARGETS ${TREELITE_LIBS}
BUILD_EXPORT_SET cuml-exports
INSTALL_EXPORT_SET cuml-exports
CPM_ARGS
GIT_REPOSITORY https://github.com/dmlc/treelite.git
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -104,7 +104,7 @@ auto constexpr static const WARP_SIZE = index_type{32};
auto constexpr static const MAX_THREADS_PER_BLOCK = index_type{256};
#ifdef __CUDACC__
#if __CUDA_ARCH__ == 720 || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 860 || \
__CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890
__CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 || __CUDA_ARCH__ == 1200
auto constexpr static const MAX_THREADS_PER_SM = index_type{1024};
#else
auto constexpr static const MAX_THREADS_PER_SM = index_type{2048};
Expand Down
Loading