rapidsai · rapids-bot · Jan 14, 2026 · Jan 12, 2026 · Jan 12, 2026 · Jan 12, 2026
@@ -90,7 +90,7 @@ outputs:
     build:
       script:
         content: |
-          cmake --install cpp/build
+          cmake --install cpp/build --component cuml
       dynamic_linking:
         overlinking_behavior: "error"
       prefix_detection:

@@ -1,6 +1,6 @@
 # =============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 # =============================================================================
@@ -269,7 +269,8 @@ if(BUILD_CUML_TESTS OR BUILD_PRIMS_TESTS)
 endif()
 
 # CCCL before RMM, and RMM before RAFT
-include(cmake/thirdparty/get_cccl.cmake)
+include(${rapids-cmake-dir}/cpm/cccl.cmake)
+rapids_cpm_cccl()
 include(cmake/thirdparty/get_rmm.cmake)
 include(cmake/thirdparty/get_raft.cmake)
 if(LINK_CUVS)
@@ -282,11 +283,6 @@ endif()
 
 if(all_algo OR treeshap_algo)
   include(cmake/thirdparty/get_gputreeshap.cmake)
-  # Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved
-  if(NOT BUILD_SHARED_LIBS)
-    rapids_export_package(BUILD GPUTreeShap cuml-exports)
-    rapids_export_package(INSTALL GPUTreeShap cuml-exports)
-  endif()
 endif()
 
 # cumlprims_mg functionality has been moved into cuML directly
@@ -338,7 +334,10 @@ if(BUILD_CUML_CPP_LIBRARY)
   endif()
 
   if(all_algo OR arima_algo)
-    target_sources(cuml_objs PRIVATE src/arima/batched_arima.cu src/arima/batched_kalman.cu)
+    target_sources(
+      cuml_objs PRIVATE src/arima/arima_common.cu src/arima/batched_arima.cu
+                        src/arima/batched_kalman.cu
+    )
   endif()
 
   if(all_algo OR datasets_algo)
@@ -842,11 +841,26 @@ install(
   TARGETS ${_cuml_lib_targets}
   DESTINATION ${lib_dir}
   EXPORT cuml-exports
+  COMPONENT cuml
+)
+
+install(
+  DIRECTORY include/cuml/
+  DESTINATION include/cuml
+  COMPONENT cuml
 )
 
-install(DIRECTORY include/cuml/ DESTINATION include/cuml)
+install(
+  FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuml/version_config.hpp
+  DESTINATION include/cuml
+  COMPONENT cuml
+)
 
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuml/version_config.hpp DESTINATION include/cuml)
+install(
+  FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuml/common/logger_macros.hpp
+  DESTINATION include/cuml/common
+  COMPONENT cuml
+)
 
 # ##################################################################################################
 # # - install export -------------------------------------------------------------------------------

@@ -1,6 +1,6 @@
 #=============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 #=============================================================================
@@ -47,14 +47,6 @@ function(find_and_configure_gputreeshap)
 
     endif()
 
-    # do `find_dependency(GPUTreeShap) in build and install`
-    rapids_export_package(BUILD GPUTreeShap cuml-exports)
-    rapids_export_package(INSTALL GPUTreeShap cuml-exports)
-
-    # Tell cmake where it can find the generated gputreeshap-config.cmake we wrote.
-    include("${rapids-cmake-dir}/export/find_package_root.cmake")
-    rapids_export_find_package_root(BUILD GPUTreeShap [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuml-exports)
-
     set(GPUTreeShap_ADDED ${GPUTreeShap_ADDED} PARENT_SCOPE)
 
 endfunction()

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -12,9 +12,6 @@
 #include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime.h>
-#include <thrust/execution_policy.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/counting_iterator.h>
 
 #include <algorithm>
 
@@ -112,43 +109,7 @@ struct ARIMAParams {
    *                         [mu, ar, ma, sar, sma, sigma2] (device)
    * @param[in]  stream      CUDA stream
    */
-  void pack(const ARIMAOrder& order, int batch_size, DataT* param_vec, cudaStream_t stream) const
-  {
-    int N         = order.complexity();
-    auto counting = thrust::make_counting_iterator(0);
-    // The device lambda can't capture structure members...
-    const DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma,
-                *_sigma2 = sigma2;
-    thrust::for_each(
-      thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
-        DataT* param = param_vec + bid * N;
-        if (order.k) {
-          *param = _mu[bid];
-          param++;
-        }
-        for (int i = 0; i < order.n_exog; i++) {
-          param[i] = _beta[order.n_exog * bid + i];
-        }
-        param += order.n_exog;
-        for (int ip = 0; ip < order.p; ip++) {
-          param[ip] = _ar[order.p * bid + ip];
-        }
-        param += order.p;
-        for (int iq = 0; iq < order.q; iq++) {
-          param[iq] = _ma[order.q * bid + iq];
-        }
-        param += order.q;
-        for (int iP = 0; iP < order.P; iP++) {
-          param[iP] = _sar[order.P * bid + iP];
-        }
-        param += order.P;
-        for (int iQ = 0; iQ < order.Q; iQ++) {
-          param[iQ] = _sma[order.Q * bid + iQ];
-        }
-        param += order.Q;
-        *param = _sigma2[bid];
-      });
-  }
+  void pack(const ARIMAOrder& order, int batch_size, DataT* param_vec, cudaStream_t stream) const;
 
   /**
    * Unpack a parameter vector into separate arrays of parameters.
@@ -159,43 +120,7 @@ struct ARIMAParams {
    *                        [mu, ar, ma, sar, sma, sigma2] (device)
    * @param[in]  stream     CUDA stream
    */
-  void unpack(const ARIMAOrder& order, int batch_size, const DataT* param_vec, cudaStream_t stream)
-  {
-    int N         = order.complexity();
-    auto counting = thrust::make_counting_iterator(0);
-    // The device lambda can't capture structure members...
-    DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma,
-          *_sigma2 = sigma2;
-    thrust::for_each(
-      thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
-        const DataT* param = param_vec + bid * N;
-        if (order.k) {
-          _mu[bid] = *param;
-          param++;
-        }
-        for (int i = 0; i < order.n_exog; i++) {
-          _beta[order.n_exog * bid + i] = param[i];
-        }
-        param += order.n_exog;
-        for (int ip = 0; ip < order.p; ip++) {
-          _ar[order.p * bid + ip] = param[ip];
-        }
-        param += order.p;
-        for (int iq = 0; iq < order.q; iq++) {
-          _ma[order.q * bid + iq] = param[iq];
-        }
-        param += order.q;
-        for (int iP = 0; iP < order.P; iP++) {
-          _sar[order.P * bid + iP] = param[iP];
-        }
-        param += order.P;
-        for (int iQ = 0; iQ < order.Q; iQ++) {
-          _sma[order.Q * bid + iQ] = param[iQ];
-        }
-        param += order.Q;
-        _sigma2[bid] = *param;
-      });
-  }
+  void unpack(const ARIMAOrder& order, int batch_size, const DataT* param_vec, cudaStream_t stream);
 };
 
 /**

@@ -0,0 +1,100 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuml/tsa/arima_common.h>
+
+#include <thrust/execution_policy.h>
+#include <thrust/for_each.h>
+#include <thrust/iterator/counting_iterator.h>
+
+namespace ML {
+
+template <typename DataT>
+void ARIMAParams<DataT>::pack(const ARIMAOrder& order,
+                              int batch_size,
+                              DataT* param_vec,
+                              cudaStream_t stream) const
+{
+  int N         = order.complexity();
+  auto counting = thrust::make_counting_iterator(0);
+  // The device lambda can't capture structure members...
+  const DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma,
+              *_sigma2 = sigma2;
+  thrust::for_each(
+    thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
+      DataT* param = param_vec + bid * N;
+      if (order.k) {
+        *param = _mu[bid];
+        param++;
+      }
+      for (int i = 0; i < order.n_exog; i++) {
+        param[i] = _beta[order.n_exog * bid + i];
+      }
+      param += order.n_exog;
+      for (int ip = 0; ip < order.p; ip++) {
+        param[ip] = _ar[order.p * bid + ip];
+      }
+      param += order.p;
+      for (int iq = 0; iq < order.q; iq++) {
+        param[iq] = _ma[order.q * bid + iq];
+      }
+      param += order.q;
+      for (int iP = 0; iP < order.P; iP++) {
+        param[iP] = _sar[order.P * bid + iP];
+      }
+      param += order.P;
+      for (int iQ = 0; iQ < order.Q; iQ++) {
+        param[iQ] = _sma[order.Q * bid + iQ];
+      }
+      param += order.Q;
+      *param = _sigma2[bid];
+    });
+}
+
+template <typename DataT>
+void ARIMAParams<DataT>::unpack(const ARIMAOrder& order,
+                                int batch_size,
+                                const DataT* param_vec,
+                                cudaStream_t stream)
+{
+  int N         = order.complexity();
+  auto counting = thrust::make_counting_iterator(0);
+  // The device lambda can't capture structure members...
+  DataT *_mu = mu, *_beta = beta, *_ar = ar, *_ma = ma, *_sar = sar, *_sma = sma, *_sigma2 = sigma2;
+  thrust::for_each(
+    thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
+      const DataT* param = param_vec + bid * N;
+      if (order.k) {
+        _mu[bid] = *param;
+        param++;
+      }
+      for (int i = 0; i < order.n_exog; i++) {
+        _beta[order.n_exog * bid + i] = param[i];
+      }
+      param += order.n_exog;
+      for (int ip = 0; ip < order.p; ip++) {
+        _ar[order.p * bid + ip] = param[ip];
+      }
+      param += order.p;
+      for (int iq = 0; iq < order.q; iq++) {
+        _ma[order.q * bid + iq] = param[iq];
+      }
+      param += order.q;
+      for (int iP = 0; iP < order.P; iP++) {
+        _sar[order.P * bid + iP] = param[iP];
+      }
+      param += order.P;
+      for (int iQ = 0; iQ < order.Q; iQ++) {
+        _sma[order.Q * bid + iQ] = param[iQ];
+      }
+      param += order.Q;
+      _sigma2[bid] = *param;
+    });
+}
+
+// Explicit template instantiation
+template struct ARIMAParams<double>;
+
+}  // namespace ML
@@ -1,6 +1,6 @@
 # =============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 # =============================================================================
@@ -69,7 +69,7 @@ set(CUML_EXCLUDE_TREELITE_FROM_ALL ON)
 include(${CUML_CPP_SRC}/cmake/thirdparty/get_treelite.cmake)
 
 # --- libcuml --- #
-find_package(cuml "${RAPIDS_VERSION}" REQUIRED)
+find_package(cuml "${RAPIDS_VERSION}" REQUIRED COMPONENTS cuml)
 
 set(cuml_sg_libraries cuml::${CUML_CPP_TARGET})
 set(cuml_mg_libraries cuml::${CUML_CPP_TARGET})

@@ -54,6 +54,7 @@ sdist.reproducible = true
 wheel.packages = ["libcuml"]
 wheel.install-dir = "libcuml"
 wheel.py-api = "py3"
+install.components = ["cuml"]
 
 [tool.scikit-build.metadata.version]
 provider = "scikit_build_core.metadata.regex"