Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conda/recipes/libcuml/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ outputs:
build:
script:
content: |
cmake --install cpp/build
cmake --install cpp/build --component cuml
Comment thread
csadorf marked this conversation as resolved.
dynamic_linking:
overlinking_behavior: "error"
prefix_detection:
Expand Down
34 changes: 24 additions & 10 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# =============================================================================
# cmake-format: off
# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
# cmake-format: on
# =============================================================================
Expand Down Expand Up @@ -269,7 +269,8 @@ if(BUILD_CUML_TESTS OR BUILD_PRIMS_TESTS)
endif()

# CCCL before RMM, and RMM before RAFT
include(cmake/thirdparty/get_cccl.cmake)
include(${rapids-cmake-dir}/cpm/cccl.cmake)
Comment thread
divyegala marked this conversation as resolved.
Outdated
rapids_cpm_cccl()
Comment thread
csadorf marked this conversation as resolved.
Outdated
Comment thread
divyegala marked this conversation as resolved.
Outdated
include(cmake/thirdparty/get_rmm.cmake)
include(cmake/thirdparty/get_raft.cmake)
if(LINK_CUVS)
Expand All @@ -282,11 +283,6 @@ endif()

if(all_algo OR treeshap_algo)
include(cmake/thirdparty/get_gputreeshap.cmake)
# Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved
if(NOT BUILD_SHARED_LIBS)
rapids_export_package(BUILD GPUTreeShap cuml-exports)
rapids_export_package(INSTALL GPUTreeShap cuml-exports)
endif()
Comment thread
csadorf marked this conversation as resolved.
endif()

# cumlprims_mg functionality has been moved into cuML directly
Expand Down Expand Up @@ -338,7 +334,10 @@ if(BUILD_CUML_CPP_LIBRARY)
endif()

if(all_algo OR arima_algo)
target_sources(cuml_objs PRIVATE src/arima/batched_arima.cu src/arima/batched_kalman.cu)
target_sources(
cuml_objs PRIVATE src/arima/arima_common.cu src/arima/batched_arima.cu
src/arima/batched_kalman.cu
)
endif()

if(all_algo OR datasets_algo)
Expand Down Expand Up @@ -842,11 +841,26 @@ install(
TARGETS ${_cuml_lib_targets}
DESTINATION ${lib_dir}
EXPORT cuml-exports
COMPONENT cuml
)

install(
DIRECTORY include/cuml/
DESTINATION include/cuml
COMPONENT cuml
)

install(DIRECTORY include/cuml/ DESTINATION include/cuml)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuml/version_config.hpp
DESTINATION include/cuml
COMPONENT cuml
)

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuml/version_config.hpp DESTINATION include/cuml)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuml/common/logger_macros.hpp
DESTINATION include/cuml/common
COMPONENT cuml
)

# ##################################################################################################
# # - install export -------------------------------------------------------------------------------
Expand Down
15 changes: 0 additions & 15 deletions cpp/cmake/thirdparty/get_cccl.cmake

This file was deleted.

10 changes: 1 addition & 9 deletions cpp/cmake/thirdparty/get_gputreeshap.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#=============================================================================
# cmake-format: off
# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
# cmake-format: on
#=============================================================================
Expand Down Expand Up @@ -47,14 +47,6 @@ function(find_and_configure_gputreeshap)

endif()

# do `find_dependency(GPUTreeShap) in build and install`
rapids_export_package(BUILD GPUTreeShap cuml-exports)
rapids_export_package(INSTALL GPUTreeShap cuml-exports)

# Tell cmake where it can find the generated gputreeshap-config.cmake we wrote.
include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(BUILD GPUTreeShap [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuml-exports)

set(GPUTreeShap_ADDED ${GPUTreeShap_ADDED} PARENT_SCOPE)

endfunction()
Expand Down
81 changes: 3 additions & 78 deletions cpp/include/cuml/tsa/arima_common.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -12,9 +12,6 @@
#include <rmm/resource_ref.hpp>

#include <cuda_runtime.h>
#include <thrust/execution_policy.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>

#include <algorithm>

Expand Down Expand Up @@ -112,43 +109,7 @@ struct ARIMAParams {
* [mu, ar, ma, sar, sma, sigma2] (device)
* @param[in] stream CUDA stream
*/
void pack(const ARIMAOrder& order, int batch_size, DataT* param_vec, cudaStream_t stream) const
{
int N = order.complexity();
auto counting = thrust::make_counting_iterator(0);
// The device lambda can't capture structure members...
const DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma,
*_sigma2 = sigma2;
thrust::for_each(
thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
DataT* param = param_vec + bid * N;
if (order.k) {
*param = _mu[bid];
param++;
}
for (int i = 0; i < order.n_exog; i++) {
param[i] = _beta[order.n_exog * bid + i];
}
param += order.n_exog;
for (int ip = 0; ip < order.p; ip++) {
param[ip] = _ar[order.p * bid + ip];
}
param += order.p;
for (int iq = 0; iq < order.q; iq++) {
param[iq] = _ma[order.q * bid + iq];
}
param += order.q;
for (int iP = 0; iP < order.P; iP++) {
param[iP] = _sar[order.P * bid + iP];
}
param += order.P;
for (int iQ = 0; iQ < order.Q; iQ++) {
param[iQ] = _sma[order.Q * bid + iQ];
}
param += order.Q;
*param = _sigma2[bid];
});
}
void pack(const ARIMAOrder& order, int batch_size, DataT* param_vec, cudaStream_t stream) const;

/**
* Unpack a parameter vector into separate arrays of parameters.
Expand All @@ -159,43 +120,7 @@ struct ARIMAParams {
* [mu, ar, ma, sar, sma, sigma2] (device)
* @param[in] stream CUDA stream
*/
void unpack(const ARIMAOrder& order, int batch_size, const DataT* param_vec, cudaStream_t stream)
{
int N = order.complexity();
auto counting = thrust::make_counting_iterator(0);
// The device lambda can't capture structure members...
DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma,
*_sigma2 = sigma2;
thrust::for_each(
thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
const DataT* param = param_vec + bid * N;
if (order.k) {
_mu[bid] = *param;
param++;
}
for (int i = 0; i < order.n_exog; i++) {
_beta[order.n_exog * bid + i] = param[i];
}
param += order.n_exog;
for (int ip = 0; ip < order.p; ip++) {
_ar[order.p * bid + ip] = param[ip];
}
param += order.p;
for (int iq = 0; iq < order.q; iq++) {
_ma[order.q * bid + iq] = param[iq];
}
param += order.q;
for (int iP = 0; iP < order.P; iP++) {
_sar[order.P * bid + iP] = param[iP];
}
param += order.P;
for (int iQ = 0; iQ < order.Q; iQ++) {
_sma[order.Q * bid + iQ] = param[iQ];
}
param += order.Q;
_sigma2[bid] = *param;
});
}
void unpack(const ARIMAOrder& order, int batch_size, const DataT* param_vec, cudaStream_t stream);
};

/**
Expand Down
100 changes: 100 additions & 0 deletions cpp/src/arima/arima_common.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#include <cuml/tsa/arima_common.h>

#include <thrust/execution_policy.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>

namespace ML {

template <typename DataT>
void ARIMAParams<DataT>::pack(const ARIMAOrder& order,
int batch_size,
DataT* param_vec,
cudaStream_t stream) const
{
int N = order.complexity();
auto counting = thrust::make_counting_iterator(0);
// The device lambda can't capture structure members...
const DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma,
*_sigma2 = sigma2;
thrust::for_each(
thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
DataT* param = param_vec + bid * N;
if (order.k) {
*param = _mu[bid];
param++;
}
for (int i = 0; i < order.n_exog; i++) {
param[i] = _beta[order.n_exog * bid + i];
}
param += order.n_exog;
for (int ip = 0; ip < order.p; ip++) {
param[ip] = _ar[order.p * bid + ip];
}
param += order.p;
for (int iq = 0; iq < order.q; iq++) {
param[iq] = _ma[order.q * bid + iq];
}
param += order.q;
for (int iP = 0; iP < order.P; iP++) {
param[iP] = _sar[order.P * bid + iP];
}
param += order.P;
for (int iQ = 0; iQ < order.Q; iQ++) {
param[iQ] = _sma[order.Q * bid + iQ];
}
param += order.Q;
*param = _sigma2[bid];
});
}

template <typename DataT>
void ARIMAParams<DataT>::unpack(const ARIMAOrder& order,
int batch_size,
const DataT* param_vec,
cudaStream_t stream)
{
int N = order.complexity();
auto counting = thrust::make_counting_iterator(0);
// The device lambda can't capture structure members...
DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma, *_sigma2 = sigma2;
thrust::for_each(
thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
const DataT* param = param_vec + bid * N;
if (order.k) {
_mu[bid] = *param;
param++;
}
for (int i = 0; i < order.n_exog; i++) {
_beta[order.n_exog * bid + i] = param[i];
}
param += order.n_exog;
for (int ip = 0; ip < order.p; ip++) {
_ar[order.p * bid + ip] = param[ip];
}
param += order.p;
for (int iq = 0; iq < order.q; iq++) {
_ma[order.q * bid + iq] = param[iq];
}
param += order.q;
for (int iP = 0; iP < order.P; iP++) {
_sar[order.P * bid + iP] = param[iP];
}
param += order.P;
for (int iQ = 0; iQ < order.Q; iQ++) {
_sma[order.Q * bid + iQ] = param[iQ];
}
param += order.Q;
_sigma2[bid] = *param;
});
}

// Explicit template instantiation
template struct ARIMAParams<double>;
Comment thread
csadorf marked this conversation as resolved.

} // namespace ML
4 changes: 2 additions & 2 deletions python/cuml/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# =============================================================================
# cmake-format: off
# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
# cmake-format: on
# =============================================================================
Expand Down Expand Up @@ -69,7 +69,7 @@ set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)

# --- libcuml --- #
find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
find_package(cuml "${RAPIDS_VERSION}" REQUIRED COMPONENTS cuml)

set(cuml_sg_libraries cuml::${CUML_CPP_TARGET})
set(cuml_mg_libraries cuml::${CUML_CPP_TARGET})
Expand Down
1 change: 1 addition & 0 deletions python/libcuml/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ sdist.reproducible = true
wheel.packages = ["libcuml"]
wheel.install-dir = "libcuml"
wheel.py-api = "py3"
install.components = ["cuml"]

[tool.scikit-build.metadata.version]
provider = "scikit_build_core.metadata.regex"
Expand Down
Loading