Skip to content

Commit aaa89f5

Browse files
authored
Merge branch 'main' into init-checks-dask-kmeans
2 parents 82a6bb2 + 178b5ca commit aaa89f5

40 files changed

Lines changed: 2413 additions & 3720 deletions

.github/workflows/pr.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
5454
with:
5555
repo: cuml
56-
max_days_without_success: 7
56+
max_days_without_success: 14
5757
changed-files:
5858
secrets: inherit
5959
needs: telemetry-setup

BUILD.md

Lines changed: 105 additions & 67 deletions
Large diffs are not rendered by default.

conda/environments/all_cuda-129_arch-aarch64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies:
1616
- cuda-version=12.9
1717
- cudf==25.12.*,>=0.0.0a0
1818
- cupy>=13.6.0
19+
- cuvs==25.12.*,>=0.0.0a0
1920
- cxx-compiler
2021
- cython>=3.0.0
2122
- dask-cuda==25.12.*,>=0.0.0a0

conda/environments/all_cuda-129_arch-x86_64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies:
1616
- cuda-version=12.9
1717
- cudf==25.12.*,>=0.0.0a0
1818
- cupy>=13.6.0
19+
- cuvs==25.12.*,>=0.0.0a0
1920
- cxx-compiler
2021
- cython>=3.0.0
2122
- dask-cuda==25.12.*,>=0.0.0a0

conda/environments/all_cuda-130_arch-aarch64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies:
1616
- cuda-version=13.0
1717
- cudf==25.12.*,>=0.0.0a0
1818
- cupy>=13.6.0
19+
- cuvs==25.12.*,>=0.0.0a0
1920
- cxx-compiler
2021
- cython>=3.0.0
2122
- dask-cuda==25.12.*,>=0.0.0a0

conda/environments/all_cuda-130_arch-x86_64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies:
1616
- cuda-version=13.0
1717
- cudf==25.12.*,>=0.0.0a0
1818
- cupy>=13.6.0
19+
- cuvs==25.12.*,>=0.0.0a0
1920
- cxx-compiler
2021
- cython>=3.0.0
2122
- dask-cuda==25.12.*,>=0.0.0a0

cpp/bench/sg/linkage.cu

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include "benchmark.cuh"
77

88
#include <cuml/cluster/linkage.hpp>
9-
#include <cuml/cluster/single_linkage_output.hpp>
109
#include <cuml/common/distance_type.hpp>
1110
#include <cuml/common/logger.hpp>
1211

@@ -35,36 +34,32 @@ class Linkage : public BlobsFixture<D> {
3534
}
3635

3736
this->loopOnState(state, [this]() {
38-
out_arrs.labels = labels;
39-
out_arrs.children = out_children;
40-
41-
ML::single_linkage_neighbors(*this->handle,
42-
this->data.X.data(),
43-
this->params.nrows,
44-
this->params.ncols,
45-
&out_arrs,
46-
ML::distance::DistanceType::L2Unexpanded,
47-
15,
48-
50);
37+
ML::linkage::single_linkage(*this->handle,
38+
this->data.X.data(),
39+
this->params.nrows,
40+
this->params.ncols,
41+
50,
42+
ML::distance::DistanceType::L2Unexpanded,
43+
children,
44+
labels);
4945
});
5046
}
5147

5248
void allocateTempBuffers(const ::benchmark::State& state) override
5349
{
5450
this->alloc(labels, this->params.nrows);
55-
this->alloc(out_children, (this->params.nrows - 1) * 2);
51+
this->alloc(children, (this->params.nrows - 1) * 2);
5652
}
5753

5854
void deallocateTempBuffers(const ::benchmark::State& state) override
5955
{
6056
this->dealloc(labels, this->params.nrows);
61-
this->dealloc(out_children, (this->params.nrows - 1) * 2);
57+
this->dealloc(children, (this->params.nrows - 1) * 2);
6258
}
6359

6460
private:
6561
int* labels;
66-
int* out_children;
67-
ML::single_linkage_output<int> out_arrs;
62+
int* children;
6863
};
6964

7065
std::vector<Params> getInputs()

cpp/include/cuml/cluster/linkage.hpp

Lines changed: 28 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -5,75 +5,42 @@
55

66
#pragma once
77

8-
#include <cuml/cluster/single_linkage_output.hpp>
98
#include <cuml/common/distance_type.hpp>
109

1110
#include <raft/core/handle.hpp>
1211

13-
namespace raft {
14-
class handle_t;
15-
}
16-
1712
namespace ML {
13+
namespace linkage {
1814

1915
/**
2016
* @brief Computes single-linkage hierarchical clustering on a dense input
2117
* feature matrix and outputs the labels, dendrogram, and minimum spanning tree.
22-
* Connectivities are constructed using the full n^2 pairwise distance matrix.
23-
* This can be very fast for smaller datasets when there is enough memory
24-
* available.
25-
* @param[in] handle raft handle to encapsulate expensive resources
26-
* @param[in] X dense feature matrix on device
27-
* @param[in] m number of rows in X
28-
* @param[in] n number of columns in X
29-
* @param[in] metric distance metric to use. Must be supported by the
30-
* dense pairwise distances API.
31-
* @param[out] out container object for output arrays
32-
* @param[out] n_clusters number of clusters to cut from resulting dendrogram
33-
*/
34-
void single_linkage_pairwise(const raft::handle_t& handle,
35-
const float* X,
36-
size_t m,
37-
size_t n,
38-
ML::single_linkage_output<int>* out,
39-
ML::distance::DistanceType metric,
40-
int n_clusters = 5);
41-
42-
/**
43-
* @brief Computes single-linkage hierarchical clustering on a dense input
44-
* feature matrix and outputs the labels, dendrogram, and minimum spanning tree.
45-
* Connectivities are constructed using a k-nearest neighbors graph. While this
46-
* strategy enables the algorithm to scale to much higher numbers of rows,
47-
* it comes with the downside that additional knn steps may need to be
48-
* executed to connect an otherwise unconnected k-nn graph.
49-
* @param[in] handle raft handle to encapsulate expensive resources
50-
* @param[in] X dense feature matrix on device
51-
* @param[in] m number of rows in X
52-
* @param[in] n number of columns in X
53-
* @param[in] metric distance metric to use. Must be supported by the
54-
* dense pairwise distances API.
55-
* @param[out] out container object for output arrays
56-
* @param[out] c the optimal value of k is guaranteed to be at least log(n) + c
57-
* where c is some constant. This constant can usually be set to a fairly low
58-
* value, like 15, and still maintain good performance.
59-
* @param[out] n_clusters number of clusters to cut from resulting dendrogram
18+
*
19+
* @param[in] handle: raft handle to encapsulate expensive resources
20+
* @param[in] X: dense feature matrix on device, C contiguous
21+
* @param[in] n_rows: number of rows in X
22+
* @param[in] n_cols: number of columns in X
23+
* @param[in] n_clusters: the number of clusters to fit.
24+
* @param[in] metric: distance metric to use. Must be supported by the
25+
* dense pairwise distances API.
26+
* @param[out] children: the output dendrogram, shape=(n_rows - 1, 2), C contiguous
27+
* @param[out] labels: the output labels, shape=(n_rows,)
28+
* @param[in] use_knn: whether to construct a knn graph instead of the full
29+
* n^2 pairwise distance matrix. This can be faster for very large
30+
* datasets or in cases where lower memory usage is required.
31+
* @param[in] c: tunes the number of neighbors when `use_knn` is true, where
32+
* `n_neighbors=log(n_rows) + c`.
6033
*/
61-
void single_linkage_neighbors(
62-
const raft::handle_t& handle,
63-
const float* X,
64-
size_t m,
65-
size_t n,
66-
ML::single_linkage_output<int>* out,
67-
ML::distance::DistanceType metric = ML::distance::DistanceType::L2Unexpanded,
68-
int c = 15,
69-
int n_clusters = 5);
70-
71-
void single_linkage_pairwise(const raft::handle_t& handle,
72-
const float* X,
73-
size_t m,
74-
size_t n,
75-
ML::single_linkage_output<int64_t>* out,
76-
ML::distance::DistanceType metric,
77-
int n_clusters = 5);
78-
34+
void single_linkage(const raft::handle_t& handle,
35+
const float* X,
36+
int n_rows,
37+
int n_cols,
38+
size_t n_clusters,
39+
ML::distance::DistanceType metric,
40+
int* children,
41+
int* labels,
42+
bool use_knn = false,
43+
int c = 15);
44+
45+
}; // namespace linkage
7946
}; // namespace ML

cpp/include/cuml/cluster/single_linkage_output.hpp

Lines changed: 0 additions & 35 deletions
This file was deleted.

0 commit comments

Comments
 (0)