Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
7e9f29a
hdbscan cpp int64
jinsolp Jul 18, 2025
91ecf2d
int64
jinsolp Jul 18, 2025
e362a3a
python bindings
jinsolp Jul 18, 2025
e1fa9ce
generic casting to prepare for int64 transition
jinsolp Jul 18, 2025
bbfc01a
Merge branch 'branch-25.08' into generic-casting
jinsolp Jul 22, 2025
f88ab01
Merge branch 'rapidsai:branch-25.08' into hdbscan-int64-ref
jinsolp Jul 23, 2025
276cb2b
Merge branch 'branch-25.08' into generic-casting
jinsolp Jul 31, 2025
56cbc07
Merge branch 'branch-25.10' into generic-casting
jinsolp Jul 31, 2025
b7d94a2
static_cast
jinsolp Aug 5, 2025
b282ea3
Merge branch 'generic-casting' of https://github.com/jinsolp/cuml int…
jinsolp Aug 5, 2025
3f1287e
Merge branch 'branch-25.10' into generic-casting
jinsolp Aug 5, 2025
afefc10
Merge branch 'branch-25.10' into generic-casting
jinsolp Aug 5, 2025
4fbe1fa
static cast
jinsolp Aug 6, 2025
33f3aa8
Merge branch 'generic-casting' of https://github.com/jinsolp/cuml int…
jinsolp Aug 6, 2025
d7b3134
Merge branch 'branch-25.10' into generic-casting
jinsolp Aug 6, 2025
3b8b4ba
Merge branch 'branch-25.10' into generic-casting
jinsolp Aug 7, 2025
716029a
Merge branch 'rapidsai:branch-25.10' into hdbscan-int64-ref
jinsolp Aug 8, 2025
4466710
merge commit
jinsolp Aug 8, 2025
5c72ea3
func sig
jinsolp Aug 8, 2025
507df40
static_cast
jinsolp Aug 8, 2025
635272c
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Aug 11, 2025
8174926
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Aug 19, 2025
fdb875d
get_cuvs pin
jinsolp Aug 19, 2025
f6dfdbe
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Aug 25, 2025
ad2682e
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Aug 27, 2025
98d1680
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Sep 3, 2025
27f30f5
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Sep 16, 2025
356b285
pin cmake
jinsolp Sep 17, 2025
0e53b8b
fix cmake pin
jinsolp Sep 17, 2025
dbfa33b
fix cmake pin
jinsolp Sep 17, 2025
ac75472
resolve merge conflict
jinsolp Sep 22, 2025
73d277c
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Sep 22, 2025
bbd4909
static cast
jinsolp Sep 22, 2025
18f0743
Merge branch 'rapidsai:branch-25.10' into hdbscan-int64-ref
jinsolp Sep 23, 2025
7345259
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Sep 24, 2025
6bf8d17
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Sep 25, 2025
60a82d6
ci files for running on cuvs
jinsolp Sep 26, 2025
20fc413
ci files
jinsolp Sep 26, 2025
8195622
Merge branch 'hdbscan-int64-ref' of https://github.com/jinsolp/cuml i…
jinsolp Sep 26, 2025
cb77a6a
ci files
jinsolp Sep 26, 2025
db36d3f
Merge branch 'branch-25.10' into hdbscan-int64-ref
jinsolp Sep 26, 2025
c03777b
use conda packages from PRs in more places, try to pin to specific pa…
jameslamb Sep 26, 2025
ff9ed8a
merge conflict branch-25.12
jinsolp Oct 6, 2025
05665c6
revert ver
jinsolp Oct 6, 2025
976b5ae
debug .sh
jinsolp Oct 8, 2025
07863c5
Revert wheel .sh
jinsolp Oct 8, 2025
17895f0
Merge branch 'branch-25.12' into hdbscan-int64-ref
jinsolp Oct 8, 2025
030a5da
fix to source correct file
jinsolp Oct 8, 2025
ca7c059
debug print for conda, wheel cuml
jinsolp Oct 8, 2025
5b02e14
not delete origin .json
jinsolp Oct 8, 2025
4d8e5c7
rm source use_wheels_from_prs.sh
jinsolp Oct 8, 2025
490c916
preprend conda channels
jinsolp Oct 8, 2025
119a3b2
merge and resolve conflict
jinsolp Oct 8, 2025
0b73aeb
int32- > int64
jinsolp Oct 9, 2025
591b03d
Merge branch 'branch-25.12' into hdbscan-int64-ref
jinsolp Oct 9, 2025
fe35606
revert ci cmake files
jinsolp Oct 10, 2025
289e797
Merge branch 'branch-25.12' into hdbscan-int64-ref
jinsolp Oct 10, 2025
b7f93ed
empty commit to trigger CI
jinsolp Oct 10, 2025
9a83671
Merge branch 'branch-25.12' into hdbscan-int64-ref
jinsolp Oct 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 26 additions & 26 deletions cpp/include/cuml/cluster/hdbscan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ class hdbscan_output : public robust_single_linkage_output<value_idx, value_t> {
CondensedHierarchy<value_idx, value_t> condensed_tree;
};

template class CondensedHierarchy<int, float>;
template class CondensedHierarchy<int64_t, float>;

/**
* Container object for computing and storing intermediate information needed later for computing
Expand Down Expand Up @@ -387,14 +387,14 @@ class PredictionData {
rmm::device_uvector<value_idx> index_into_children;
};

template class PredictionData<int, float>;
template class PredictionData<int64_t, float>;

void generate_prediction_data(const raft::handle_t& handle,
CondensedHierarchy<int, float>& condensed_tree,
int* labels,
int* inverse_label_map,
CondensedHierarchy<int64_t, float>& condensed_tree,
int64_t* labels,
int64_t* inverse_label_map,
int n_selected_clusters,
PredictionData<int, float>& prediction_data);
PredictionData<int64_t, float>& prediction_data);

}; // namespace Common
}; // namespace HDBSCAN
Expand Down Expand Up @@ -427,43 +427,43 @@ void hdbscan(const raft::handle_t& handle,
size_t n,
ML::distance::DistanceType metric,
HDBSCAN::Common::HDBSCANParams& params,
HDBSCAN::Common::hdbscan_output<int, float>& out,
HDBSCAN::Common::hdbscan_output<int64_t, float>& out,
float* core_dists);

void build_condensed_hierarchy(const raft::handle_t& handle,
const int* children,
const int64_t* children,
const float* delta,
const int* sizes,
const int64_t* sizes,
int min_cluster_size,
int n_leaves,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree);
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree);

void _extract_clusters(const raft::handle_t& handle,
size_t n_leaves,
int n_edges,
int* parents,
int* children,
int64_t* parents,
int64_t* children,
float* lambdas,
int* sizes,
int* labels,
int64_t* sizes,
int64_t* labels,
float* probabilities,
HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method,
bool allow_single_cluster,
int max_cluster_size,
int64_t max_cluster_size,
float cluster_selection_epsilon);

void compute_all_points_membership_vectors(
const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int, float>& prediction_data,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int64_t, float>& prediction_data,
const float* X,
ML::distance::DistanceType metric,
float* membership_vec,
size_t batch_size = 4096);

void compute_membership_vector(const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int, float>& prediction_data,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int64_t, float>& prediction_data,
const float* X,
const float* points_to_predict,
size_t n_prediction_points,
Expand All @@ -473,15 +473,15 @@ void compute_membership_vector(const raft::handle_t& handle,
size_t batch_size = 4096);

void out_of_sample_predict(const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int, float>& prediction_data,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int64_t, float>& prediction_data,
const float* X,
int* labels,
int64_t* labels,
const float* points_to_predict,
size_t n_prediction_points,
ML::distance::DistanceType metric,
int min_samples,
int* out_labels,
int64_t* out_labels,
float* out_probabilities);

namespace HDBSCAN::HELPER {
Expand Down Expand Up @@ -519,12 +519,12 @@ void compute_core_dists(const raft::handle_t& handle,
* @param[in] cluster_selection_epsilon cluster selection epsilon
*/
void compute_inverse_label_map(const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
size_t n_leaves,
HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method,
rmm::device_uvector<int>& inverse_label_map,
rmm::device_uvector<int64_t>& inverse_label_map,
bool allow_single_cluster,
int max_cluster_size,
int64_t max_cluster_size,
float cluster_selection_epsilon);

} // namespace HDBSCAN::HELPER
Expand Down
48 changes: 24 additions & 24 deletions cpp/src/hdbscan/hdbscan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,20 @@ void hdbscan(const raft::handle_t& handle,
size_t n,
ML::distance::DistanceType metric,
HDBSCAN::Common::HDBSCANParams& params,
HDBSCAN::Common::hdbscan_output<int, float>& out,
HDBSCAN::Common::hdbscan_output<int64_t, float>& out,
float* core_dists)
{
rmm::device_uvector<int> labels(m, handle.get_stream());
rmm::device_uvector<int64_t> labels(m, handle.get_stream());
HDBSCAN::_fit_hdbscan(handle, X, m, n, metric, params, labels.data(), core_dists, out);
}

void build_condensed_hierarchy(const raft::handle_t& handle,
const int* children,
const int64_t* children,
const float* delta,
const int* sizes,
const int64_t* sizes,
int min_cluster_size,
int n_leaves,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree)
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree)
{
HDBSCAN::detail::Condense::build_condensed_hierarchy(
handle, children, delta, sizes, min_cluster_size, n_leaves, condensed_tree);
Expand All @@ -54,23 +54,23 @@ void build_condensed_hierarchy(const raft::handle_t& handle,
void _extract_clusters(const raft::handle_t& handle,
size_t n_leaves,
int n_edges,
int* parents,
int* children,
int64_t* parents,
int64_t* children,
float* lambdas,
int* sizes,
int* labels,
int64_t* sizes,
int64_t* labels,
float* probabilities,
HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method,
bool allow_single_cluster,
int max_cluster_size,
int64_t max_cluster_size,
float cluster_selection_epsilon)
{
HDBSCAN::Common::CondensedHierarchy condensed_tree(
handle, n_leaves, n_edges, parents, children, lambdas, sizes);

rmm::device_uvector<float> stabilities(condensed_tree.get_n_clusters(), handle.get_stream());
rmm::device_uvector<int> label_map(condensed_tree.get_n_clusters(), handle.get_stream());
rmm::device_uvector<int> inverse_label_map(0, handle.get_stream());
rmm::device_uvector<int64_t> label_map(condensed_tree.get_n_clusters(), handle.get_stream());
rmm::device_uvector<int64_t> inverse_label_map(0, handle.get_stream());

HDBSCAN::detail::Extract::extract_clusters(handle,
condensed_tree,
Expand All @@ -88,8 +88,8 @@ void _extract_clusters(const raft::handle_t& handle,

void compute_all_points_membership_vectors(
const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int, float>& prediction_data,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int64_t, float>& prediction_data,
const float* X,
ML::distance::DistanceType metric,
float* membership_vec,
Expand All @@ -100,8 +100,8 @@ void compute_all_points_membership_vectors(
}

void compute_membership_vector(const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int, float>& prediction_data,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int64_t, float>& prediction_data,
const float* X,
const float* points_to_predict,
size_t n_prediction_points,
Expand All @@ -125,15 +125,15 @@ void compute_membership_vector(const raft::handle_t& handle,
}

void out_of_sample_predict(const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int, float>& prediction_data,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
HDBSCAN::Common::PredictionData<int64_t, float>& prediction_data,
const float* X,
int* labels,
int64_t* labels,
const float* points_to_predict,
size_t n_prediction_points,
ML::distance::DistanceType metric,
int min_samples,
int* out_labels,
int64_t* out_labels,
float* out_probabilities)
{
// Note that (min_samples+1) is parsed to the approximate_predict function. This was done for the
Expand Down Expand Up @@ -161,17 +161,17 @@ void compute_core_dists(const raft::handle_t& handle,
ML::distance::DistanceType metric,
int min_samples)
{
HDBSCAN::detail::Reachability::_compute_core_dists<int, float>(
HDBSCAN::detail::Reachability::_compute_core_dists<int64_t, float>(
handle, X, core_dists, m, n, metric, min_samples);
}

void compute_inverse_label_map(const raft::handle_t& handle,
HDBSCAN::Common::CondensedHierarchy<int, float>& condensed_tree,
HDBSCAN::Common::CondensedHierarchy<int64_t, float>& condensed_tree,
size_t n_leaves,
HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method,
rmm::device_uvector<int>& inverse_label_map,
rmm::device_uvector<int64_t>& inverse_label_map,
bool allow_single_cluster,
int max_cluster_size,
int64_t max_cluster_size,
float cluster_selection_epsilon)
{
HDBSCAN::detail::Extract::_compute_inverse_label_map(handle,
Expand Down
20 changes: 10 additions & 10 deletions cpp/src/hdbscan/prediction_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ void build_index_into_children(const raft::handle_t& handle,
* @param[in] prediction_data PreditionData object
*/
void generate_prediction_data(const raft::handle_t& handle,
CondensedHierarchy<int, float>& condensed_tree,
int* labels,
int* inverse_label_map,
CondensedHierarchy<int64_t, float>& condensed_tree,
int64_t* labels,
int64_t* inverse_label_map,
int n_selected_clusters,
PredictionData<int, float>& prediction_data)
PredictionData<int64_t, float>& prediction_data)
{
auto stream = handle.get_stream();
auto exec_policy = handle.get_thrust_policy();
Expand All @@ -120,10 +120,10 @@ void generate_prediction_data(const raft::handle_t& handle,
auto sizes = condensed_tree.get_sizes();

// first compute the death of each cluster in the condensed hierarchy
rmm::device_uvector<int> sorted_parents(n_edges, stream);
rmm::device_uvector<int64_t> sorted_parents(n_edges, stream);
raft::copy_async(sorted_parents.data(), parents, n_edges, stream);

rmm::device_uvector<int> sorted_parents_offsets(n_clusters + 1, stream);
rmm::device_uvector<int64_t> sorted_parents_offsets(n_clusters + 1, stream);
detail::Utils::parent_csr(
handle, condensed_tree, sorted_parents.data(), sorted_parents_offsets.data());

Expand All @@ -135,8 +135,8 @@ void generate_prediction_data(const raft::handle_t& handle,
const float* d_in,
float* d_out,
int num_segments,
const int* d_begin_offsets,
const int* d_end_offsets,
const int64_t* d_begin_offsets,
const int64_t* d_end_offsets,
cudaStream_t stream = 0) -> cudaError_t {
return cub::DeviceSegmentedReduce::Max(d_temp_storage,
temp_storage_bytes,
Expand Down Expand Up @@ -201,7 +201,7 @@ void generate_prediction_data(const raft::handle_t& handle,
[is_exemplar = is_exemplar.data()] __device__(auto idx) { return is_exemplar[idx]; });

// use the exemplar labels to fetch the set of selected clusters from the condensed hierarchy
rmm::device_uvector<int> exemplar_labels(n_exemplars, stream);
rmm::device_uvector<int64_t> exemplar_labels(n_exemplars, stream);

// this uses the original, pre-normalized label by
// using the inverse label_map to lookup the original labels from final labels
Expand All @@ -212,7 +212,7 @@ void generate_prediction_data(const raft::handle_t& handle,
[labels, inverse_label_map] __device__(auto idx) {
auto label = labels[idx];
if (label != -1) { return inverse_label_map[label]; }
return -1;
return static_cast<int64_t>(-1);
});

thrust::sort_by_key(exec_policy,
Expand Down
Loading