Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#

# distutils: language = c++
import warnings

import cupy as cp
import numpy as np
from pylibraft.common.handle import Handle
Expand Down Expand Up @@ -750,7 +752,7 @@ class HDBSCAN(Base, InteropMixin, ClusterMixin, CMajorInputTagMixin):
gen_min_span_tree=False,
handle=None,
verbose=False,
connectivity='knn',
connectivity='deprecated',
output_type=None,
prediction_data=False):

Expand All @@ -761,9 +763,11 @@ class HDBSCAN(Base, InteropMixin, ClusterMixin, CMajorInputTagMixin):
if min_samples is None:
min_samples = min_cluster_size

if connectivity not in ["knn", "pairwise"]:
raise ValueError("'connectivity' can only be one of "
"{'knn', 'pairwise'}")
if connectivity != "deprecated":
warnings.warn(
"The `connectivity` parameter is deprecated and will be removed in 25.10",
FutureWarning,
)

if 2 < min_samples and min_samples > 1023:
raise ValueError("'min_samples' must be a positive number "
Expand Down Expand Up @@ -888,12 +892,6 @@ class HDBSCAN(Base, InteropMixin, ClusterMixin, CMajorInputTagMixin):
params.cluster_selection_epsilon = self.cluster_selection_epsilon
params.allow_single_cluster = self.allow_single_cluster

if self.connectivity not in {"knn", "pairwise"}:
raise ValueError(
"`connectivity` must be one of {'knn', 'pairwise'}, "
f"got {self.connectivity!r}"
)

if self.cluster_selection_method == 'eom':
params.cluster_selection_method = lib.CLUSTER_SELECTION_METHOD.EOM
elif self.cluster_selection_method == 'leaf':
Expand Down
24 changes: 0 additions & 24 deletions python/cuml/cuml/tests/test_hdbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,10 @@ def assert_membership_vectors(cu_vecs, sk_vecs):
@pytest.mark.parametrize("cluster_selection_epsilon", [0.0])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_hdbscan_blobs(
nrows,
ncols,
nclusters,
connectivity,
cluster_selection_epsilon,
cluster_selection_method,
allow_single_cluster,
Expand Down Expand Up @@ -225,10 +223,8 @@ def test_hdbscan_blobs(
)
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_hdbscan_sklearn_datasets(
supervised_learning_dataset,
connectivity,
cluster_selection_epsilon,
cluster_selection_method,
min_samples_cluster_size_bounds,
Expand Down Expand Up @@ -289,10 +285,8 @@ def test_hdbscan_sklearn_datasets(
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_hdbscan_sklearn_extract_clusters(
supervised_learning_dataset,
connectivity,
cluster_selection_epsilon,
cluster_selection_method,
min_samples,
Expand Down Expand Up @@ -333,11 +327,9 @@ def test_hdbscan_sklearn_extract_clusters(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_hdbscan_cluster_patterns(
dataset,
nrows,
connectivity,
cluster_selection_epsilon,
cluster_selection_method,
min_cluster_size,
Expand Down Expand Up @@ -396,11 +388,9 @@ def test_hdbscan_cluster_patterns(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_hdbscan_cluster_patterns_extract_clusters(
dataset,
nrows,
connectivity,
cluster_selection_epsilon,
cluster_selection_method,
min_cluster_size,
Expand Down Expand Up @@ -589,7 +579,6 @@ def test_all_points_membership_vectors_blobs(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
@pytest.mark.parametrize("batch_size", [128, 1000])
def test_all_points_membership_vectors_moons(
nrows,
Expand All @@ -599,7 +588,6 @@ def test_all_points_membership_vectors_moons(
min_cluster_size,
allow_single_cluster,
max_cluster_size,
connectivity,
batch_size,
):

Expand Down Expand Up @@ -646,7 +634,6 @@ def test_all_points_membership_vectors_moons(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
@pytest.mark.parametrize("batch_size", [128, 1000])
def test_all_points_membership_vectors_circles(
nrows,
Expand All @@ -656,7 +643,6 @@ def test_all_points_membership_vectors_circles(
min_cluster_size,
allow_single_cluster,
max_cluster_size,
connectivity,
batch_size,
):
X, y = datasets.make_circles(
Expand Down Expand Up @@ -780,7 +766,6 @@ def test_approximate_predict_blobs(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_approximate_predict_moons(
nrows,
n_points_to_predict,
Expand All @@ -790,7 +775,6 @@ def test_approximate_predict_moons(
allow_single_cluster,
max_cluster_size,
cluster_selection_method,
connectivity,
):

X, y = datasets.make_moons(
Expand Down Expand Up @@ -845,7 +829,6 @@ def test_approximate_predict_moons(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_approximate_predict_circles(
nrows,
n_points_to_predict,
Expand All @@ -855,7 +838,6 @@ def test_approximate_predict_circles(
allow_single_cluster,
max_cluster_size,
cluster_selection_method,
connectivity,
):
X, y = datasets.make_circles(
n_samples=nrows + n_points_to_predict,
Expand Down Expand Up @@ -911,7 +893,6 @@ def test_approximate_predict_circles(
@pytest.mark.parametrize("allow_single_cluster", [False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom"])
@pytest.mark.parametrize("connectivity", ["knn"])
def test_approximate_predict_digits(
n_points_to_predict,
min_samples,
Expand All @@ -920,7 +901,6 @@ def test_approximate_predict_digits(
allow_single_cluster,
max_cluster_size,
cluster_selection_method,
connectivity,
):
digits = datasets.load_digits()
X, y = digits.data, digits.target
Expand Down Expand Up @@ -1053,7 +1033,6 @@ def test_membership_vector_blobs(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
@pytest.mark.parametrize("batch_size", [16])
def test_membership_vector_moons(
nrows,
Expand All @@ -1064,7 +1043,6 @@ def test_membership_vector_moons(
min_cluster_size,
allow_single_cluster,
max_cluster_size,
connectivity,
batch_size,
):

Expand Down Expand Up @@ -1117,7 +1095,6 @@ def test_membership_vector_moons(
@pytest.mark.parametrize("allow_single_cluster", [True, False])
@pytest.mark.parametrize("max_cluster_size", [0])
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
@pytest.mark.parametrize("connectivity", ["knn"])
@pytest.mark.parametrize("batch_size", [16])
def test_membership_vector_circles(
nrows,
Expand All @@ -1128,7 +1105,6 @@ def test_membership_vector_circles(
min_cluster_size,
allow_single_cluster,
max_cluster_size,
connectivity,
batch_size,
):
X, y = datasets.make_circles(
Expand Down