rapidsai · rapids-bot · Nov 21, 2025 · Nov 19, 2025 · Nov 20, 2025 · Nov 20, 2025
@@ -40,6 +40,7 @@ ENV HISTFILE="/home/coder/.cache/._bash_history"
 ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
 ENV SCCACHE_REGION="us-east-2"
 ENV SCCACHE_BUCKET="rapids-sccache-devs"
+ENV SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true
 ENV SCCACHE_IDLE_TIMEOUT=0
 
 ###

@@ -29,6 +29,7 @@ rattler-build build --recipe conda/recipes/libcuml \
                     "${RATTLER_CHANNELS[@]}"
 
 sccache --show-adv-stats
+sccache --stop-server >/dev/null 2>&1 || true
 
 # remove build_cache directory to avoid uploading the entire source tree
 # tracked in https://github.com/prefix-dev/rattler-build/issues/1424

@@ -39,6 +39,7 @@ rattler-build build --recipe conda/recipes/cuml \
                     "${RATTLER_CHANNELS[@]}"
 
 sccache --show-adv-stats
+sccache --stop-server >/dev/null 2>&1 || true
 
 # remove build_cache directory to avoid uploading the entire source tree
 # tracked in https://github.com/prefix-dev/rattler-build/issues/1424

@@ -11,6 +11,9 @@ source rapids-configure-sccache
 source rapids-date-string
 source rapids-init-pip
 
+export SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX="${package_name}/${RAPIDS_CONDA_ARCH}/cuda${RAPIDS_CUDA_VERSION%%.*}/wheel/preprocessor-cache"
+export SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true
+
 rapids-generate-version > ./VERSION
 
 cd "${package_dir}"
@@ -26,3 +29,4 @@ rapids-pip-retry wheel \
     .
 
 sccache --show-adv-stats
+sccache --stop-server >/dev/null 2>&1 || true
@@ -37,7 +37,7 @@ rapids-logger "notebook tests cuml"
 
 # Add notebooks that should be skipped here
 # (space-separated list of filenames without paths)
-SKIPNBS="cuml_benchmarks.ipynb hdbscan_soft_clustering_benchmark.ipynb"
+SKIPNBS="cuml_benchmarks.ipynb hdbscan_soft_clustering_benchmark.ipynb forest_inference_demo.ipynb target_encoder_walkthrough.ipynb"
 NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
 
 cd notebooks

@@ -65,7 +65,6 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==26.2.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
-- rapids-xgboost==26.2.*,>=0.0.0a0
 - recommonmark
 - rich
 - rmm==26.2.*,>=0.0.0a0

@@ -65,7 +65,6 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==26.2.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
-- rapids-xgboost==26.2.*,>=0.0.0a0
 - recommonmark
 - rich
 - rmm==26.2.*,>=0.0.0a0

@@ -65,7 +65,6 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==26.2.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
-- rapids-xgboost==26.2.*,>=0.0.0a0
 - recommonmark
 - rich
 - rmm==26.2.*,>=0.0.0a0

@@ -65,7 +65,6 @@ dependencies:
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==26.2.*,>=0.0.0a0
 - rapids-logger==0.2.*,>=0.0.0a0
-- rapids-xgboost==26.2.*,>=0.0.0a0
 - recommonmark
 - rich
 - rmm==26.2.*,>=0.0.0a0

@@ -57,6 +57,8 @@ build:
       SCCACHE_REGION: ${{ env.get("SCCACHE_REGION", default="") }}
       SCCACHE_S3_KEY_PREFIX: cuml/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_major }}
       SCCACHE_S3_NO_CREDENTIALS: ${{ env.get("SCCACHE_S3_NO_CREDENTIALS", default="false") }}
+      SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: cuml/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_major }}/conda/preprocessor-cache
+      SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: ${{ env.get("SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE", default="true") }}
       SCCACHE_S3_USE_SSL: ${{ env.get("SCCACHE_S3_USE_SSL", default="true") }}
       SCCACHE_SERVER_LOG: ${{ env.get("SCCACHE_SERVER_LOG", default="sccache=debug") }}
 

@@ -55,6 +55,8 @@ cache:
         SCCACHE_REGION: ${{ env.get("SCCACHE_REGION", default="") }}
         SCCACHE_S3_KEY_PREFIX: libcuml/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_major }}
         SCCACHE_S3_NO_CREDENTIALS: ${{ env.get("SCCACHE_S3_NO_CREDENTIALS", default="false") }}
+        SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: libcuml/${{ env.get("RAPIDS_CONDA_ARCH") }}/cuda${{ cuda_major }}/conda/preprocessor-cache
+        SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: ${{ env.get("SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE", default="true") }}
         SCCACHE_S3_USE_SSL: ${{ env.get("SCCACHE_S3_USE_SSL", default="true") }}
         SCCACHE_SERVER_LOG: ${{ env.get("SCCACHE_SERVER_LOG", default="sccache=debug") }}
 

@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include <raft/spatial/knn/detail/ann_utils.cuh>
+
 #include <stdint.h>
 
 namespace ML {
@@ -104,7 +106,15 @@ struct manifold_precomputed_knn_inputs_t : public manifold_inputs_t<value_t> {
 
   knn_graph<value_idx, value_t> knn_graph;
 
-  bool alloc_knn_graph() const { return false; }
+  bool alloc_knn_graph() const
+  {
+    // Return true if data is on CPU (need to allocate device memory)
+    // Return false if data is already on device (no allocation needed)
+    auto pointer_residency = raft::spatial::knn::detail::utils::check_pointer_residency(
+      knn_graph.knn_indices, knn_graph.knn_dists);
+    return pointer_residency == raft::spatial::knn::detail::utils::pointer_residency::host_only ||
+           pointer_residency == raft::spatial::knn::detail::utils::pointer_residency::mixed;
+  }
 };
 
 };  // end namespace ML
@@ -197,8 +197,14 @@ inline void launcher(const raft::handle_t& handle,
                      const ML::UMAPParams* params,
                      cudaStream_t stream)
 {
-  out.knn_indices = inputsA.knn_graph.knn_indices;
-  out.knn_dists   = inputsA.knn_graph.knn_dists;
+  if (inputsA.alloc_knn_graph()) {
+    // if new space for the knn graph is allocated, copy the data from the precomputed knn graph
+    raft::copy(out.knn_indices, inputsA.knn_graph.knn_indices, inputsA.n * n_neighbors, stream);
+    raft::copy(out.knn_dists, inputsA.knn_graph.knn_dists, inputsA.n * n_neighbors, stream);
+  } else {
+    out.knn_indices = inputsA.knn_graph.knn_indices;
+    out.knn_dists   = inputsA.knn_graph.knn_dists;
+  }
 }
 
 // Instantiation for precomputed inputs, int indices
@@ -211,8 +217,14 @@ inline void launcher(const raft::handle_t& handle,
                      const ML::UMAPParams* params,
                      cudaStream_t stream)
 {
-  out.knn_indices = inputsA.knn_graph.knn_indices;
-  out.knn_dists   = inputsA.knn_graph.knn_dists;
+  if (inputsA.alloc_knn_graph()) {
+    // if new space for the knn graph is allocated, copy the data from the precomputed knn graph
+    raft::copy(out.knn_indices, inputsA.knn_graph.knn_indices, inputsA.n * n_neighbors, stream);
+    raft::copy(out.knn_dists, inputsA.knn_graph.knn_dists, inputsA.n * n_neighbors, stream);
+  } else {
+    out.knn_indices = inputsA.knn_graph.knn_indices;
+    out.knn_dists   = inputsA.knn_graph.knn_dists;
+  }
 }
 
 }  // namespace Algo

@@ -35,7 +35,7 @@ files:
       - rapids_build_backend
       - test_python
       - test_python_dask
-      - test_python_xgboost
+      # - test_python_xgboost
   devcontainers:
     output: none
     includes:
@@ -124,7 +124,7 @@ files:
       - depends_on_libcuml
       - py_version
       - test_python
-      - test_python_xgboost
+      # - test_python_xgboost
   test_python_dask:
     output: none
     includes:
@@ -133,7 +133,7 @@ files:
       - depends_on_libcuml
       - py_version
       - test_python
-      - test_python_xgboost
+      # - test_python_xgboost
       - test_python_dask
       - depends_on_rapids_dask_dependency
       - depends_on_dask_cudf
@@ -202,7 +202,7 @@ files:
       key: test
     includes:
       - test_python
-      - test_python_xgboost
+      # - test_python_xgboost
   py_dask_cuml:
     output: pyproject
     pyproject_dir: python/cuml
@@ -498,18 +498,18 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - dask-ml
-  test_python_xgboost:
-    common:
-      - output_types: [conda]
-        packages:
-          # We must separate xgboost into its own list so that it is not
-          # included in the "devcontainers" key. The libxgboost package depends
-          # on librmm but we do not want to have a package depending on librmm
-          # in devcontainers since it should be built from source.
-          - &rapids-xgboost rapids-xgboost==26.2.*,>=0.0.0a0
-      - output_types: [requirements, pyproject]
-        packages:
-          - &xgboost xgboost>=2.1.0
+  #test_python_xgboost:
+    #common:
+      #- output_types: [conda]
+        #packages:
+          ## We must separate xgboost into its own list so that it is not
+          ## included in the "devcontainers" key. The libxgboost package depends
+          ## on librmm but we do not want to have a package depending on librmm
+          ## in devcontainers since it should be built from source.
+          #- &rapids-xgboost rapids-xgboost==26.2.*,>=0.0.0a0
+      #- output_types: [requirements, pyproject]
+        #packages:
+          #- &xgboost xgboost>=2.1.0
   test_notebooks:
     common:
       - output_types: [conda, requirements]
@@ -523,11 +523,11 @@ dependencies:
       - output_types: conda
         packages:
           - matplotlib-base
-          - *rapids-xgboost
+          # - *rapids-xgboost
       - output_types: requirements
         packages:
           - matplotlib
-          - *xgboost
+          # - *xgboost
   depends_on_cuda_python:
     specific:
       - output_types: [conda, requirements, pyproject]

@@ -263,7 +263,7 @@ def _determine_k_from_arrays(
     return total_elements // n_samples
 
 
-def extract_knn_graph(knn_info, n_neighbors):
+def extract_knn_graph(knn_info, n_neighbors, mem_type="device"):
     """
     Extract the nearest neighbors distances and indices
     from the knn_info parameter.
@@ -367,6 +367,7 @@ def extract_knn_graph(knn_info, n_neighbors):
         deepcopy=deepcopy,
         check_dtype=np.int64,
         convert_to_dtype=np.int64,
+        convert_to_mem_type=mem_type,
     )
 
     knn_dists_m, _, _, _ = input_to_cuml_array(
@@ -375,6 +376,7 @@ def extract_knn_graph(knn_info, n_neighbors):
         deepcopy=deepcopy,
         check_dtype=np.float32,
         convert_to_dtype=np.float32,
+        convert_to_mem_type=mem_type,
     )
 
     return knn_indices_m, knn_dists_m
@@ -5,7 +5,6 @@
 
 from cuml.internals.api_context_managers import (
     in_internal_api,
-    set_api_output_dtype,
     set_api_output_type,
 )
 from cuml.internals.api_decorators import (

@@ -61,22 +61,6 @@ def set_api_output_type(output_type: str):
     GlobalSettings().root_cm.output_type = array_type
 
 
-def set_api_output_dtype(output_dtype):
-    assert GlobalSettings().root_cm is not None
-
-    # Try to convert any array objects to their type
-    if output_dtype is not None and cuml.internals.input_utils.is_array_like(
-        output_dtype
-    ):
-        output_dtype = cuml.internals.input_utils.determine_array_dtype(
-            output_dtype
-        )
-
-        assert output_dtype is not None
-
-    GlobalSettings().root_cm.output_dtype = output_dtype
-
-
 class InternalAPIContext(contextlib.ExitStack):
     def __init__(self):
         super().__init__()
@@ -89,8 +73,6 @@ def cleanup():
         self.enter_context(cupy_using_allocator(rmm_cupy_allocator))
         self.prev_output_type = self.enter_context(_using_mirror_output_type())
 
-        self.output_dtype = None
-
         # Set the output type to the prev_output_type. If "input", set to None
         # to allow inner functions to specify the input
         self.output_type = (
@@ -124,24 +106,14 @@ def __exit__(self, *exc_details):
     def push_output_types(self):
         try:
             old_output_type = self.output_type
-            old_output_dtype = self.output_dtype
-
             self.output_type = None
-            self.output_dtype = None
-
             yield
-
         finally:
             self.output_type = (
                 old_output_type
                 if old_output_type is not None
                 else self.output_type
             )
-            self.output_dtype = (
-                old_output_dtype
-                if old_output_dtype is not None
-                else self.output_dtype
-            )
 
 
 def get_internal_context() -> InternalAPIContext:
@@ -348,10 +320,7 @@ def convert_to_outputtype(self, ret_val):
             and output_type != "input"
         ), ("Invalid root_cm.output_type: '{}'.").format(output_type)
 
-        return ret_val.to_output(
-            output_type=output_type,
-            output_dtype=self._context.root_cm.output_dtype,
-        )
+        return ret_val.to_output(output_type=output_type)
 
 
 class ProcessReturnSparseArray(ProcessReturnArray):