Skip to content

Commit 8bea92c

Browse files
authored
Deprecate connectivity parameter to HDBSCAN (#6936)
Found this while doing the recent `HDBSCAN` refactor. This parameter doesn't do anything and hasn't for a long time. It's undocumented and unused, we should just rip it out. Authors: - Jim Crist-Harif (https://github.com/jcrist) Approvers: - Simon Adorf (https://github.com/csadorf) URL: #6936
1 parent 93e0820 commit 8bea92c

2 files changed

Lines changed: 8 additions & 34 deletions

File tree

python/cuml/cuml/cluster/hdbscan/hdbscan.pyx

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#
1515

1616
# distutils: language = c++
17+
import warnings
18+
1719
import cupy as cp
1820
import numpy as np
1921
from pylibraft.common.handle import Handle
@@ -750,7 +752,7 @@ class HDBSCAN(Base, InteropMixin, ClusterMixin, CMajorInputTagMixin):
750752
gen_min_span_tree=False,
751753
handle=None,
752754
verbose=False,
753-
connectivity='knn',
755+
connectivity='deprecated',
754756
output_type=None,
755757
prediction_data=False):
756758

@@ -761,9 +763,11 @@ class HDBSCAN(Base, InteropMixin, ClusterMixin, CMajorInputTagMixin):
761763
if min_samples is None:
762764
min_samples = min_cluster_size
763765

764-
if connectivity not in ["knn", "pairwise"]:
765-
raise ValueError("'connectivity' can only be one of "
766-
"{'knn', 'pairwise'}")
766+
if connectivity != "deprecated":
767+
warnings.warn(
768+
"The `connectivity` parameter is deprecated and will be removed in 25.10",
769+
FutureWarning,
770+
)
767771

768772
if 2 < min_samples and min_samples > 1023:
769773
raise ValueError("'min_samples' must be a positive number "
@@ -888,12 +892,6 @@ class HDBSCAN(Base, InteropMixin, ClusterMixin, CMajorInputTagMixin):
888892
params.cluster_selection_epsilon = self.cluster_selection_epsilon
889893
params.allow_single_cluster = self.allow_single_cluster
890894

891-
if self.connectivity not in {"knn", "pairwise"}:
892-
raise ValueError(
893-
"`connectivity` must be one of {'knn', 'pairwise'}, "
894-
f"got {self.connectivity!r}"
895-
)
896-
897895
if self.cluster_selection_method == 'eom':
898896
params.cluster_selection_method = lib.CLUSTER_SELECTION_METHOD.EOM
899897
elif self.cluster_selection_method == 'leaf':

python/cuml/cuml/tests/test_hdbscan.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,10 @@ def assert_membership_vectors(cu_vecs, sk_vecs):
154154
@pytest.mark.parametrize("cluster_selection_epsilon", [0.0])
155155
@pytest.mark.parametrize("max_cluster_size", [0])
156156
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
157-
@pytest.mark.parametrize("connectivity", ["knn"])
158157
def test_hdbscan_blobs(
159158
nrows,
160159
ncols,
161160
nclusters,
162-
connectivity,
163161
cluster_selection_epsilon,
164162
cluster_selection_method,
165163
allow_single_cluster,
@@ -225,10 +223,8 @@ def test_hdbscan_blobs(
225223
)
226224
@pytest.mark.parametrize("allow_single_cluster", [True, False])
227225
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
228-
@pytest.mark.parametrize("connectivity", ["knn"])
229226
def test_hdbscan_sklearn_datasets(
230227
supervised_learning_dataset,
231-
connectivity,
232228
cluster_selection_epsilon,
233229
cluster_selection_method,
234230
min_samples_cluster_size_bounds,
@@ -289,10 +285,8 @@ def test_hdbscan_sklearn_datasets(
289285
@pytest.mark.parametrize("max_cluster_size", [0])
290286
@pytest.mark.parametrize("allow_single_cluster", [True, False])
291287
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
292-
@pytest.mark.parametrize("connectivity", ["knn"])
293288
def test_hdbscan_sklearn_extract_clusters(
294289
supervised_learning_dataset,
295-
connectivity,
296290
cluster_selection_epsilon,
297291
cluster_selection_method,
298292
min_samples,
@@ -333,11 +327,9 @@ def test_hdbscan_sklearn_extract_clusters(
333327
@pytest.mark.parametrize("allow_single_cluster", [True, False])
334328
@pytest.mark.parametrize("max_cluster_size", [0])
335329
@pytest.mark.parametrize("cluster_selection_method", ["eom"])
336-
@pytest.mark.parametrize("connectivity", ["knn"])
337330
def test_hdbscan_cluster_patterns(
338331
dataset,
339332
nrows,
340-
connectivity,
341333
cluster_selection_epsilon,
342334
cluster_selection_method,
343335
min_cluster_size,
@@ -396,11 +388,9 @@ def test_hdbscan_cluster_patterns(
396388
@pytest.mark.parametrize("allow_single_cluster", [True, False])
397389
@pytest.mark.parametrize("max_cluster_size", [0])
398390
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
399-
@pytest.mark.parametrize("connectivity", ["knn"])
400391
def test_hdbscan_cluster_patterns_extract_clusters(
401392
dataset,
402393
nrows,
403-
connectivity,
404394
cluster_selection_epsilon,
405395
cluster_selection_method,
406396
min_cluster_size,
@@ -589,7 +579,6 @@ def test_all_points_membership_vectors_blobs(
589579
@pytest.mark.parametrize("allow_single_cluster", [True, False])
590580
@pytest.mark.parametrize("max_cluster_size", [0])
591581
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
592-
@pytest.mark.parametrize("connectivity", ["knn"])
593582
@pytest.mark.parametrize("batch_size", [128, 1000])
594583
def test_all_points_membership_vectors_moons(
595584
nrows,
@@ -599,7 +588,6 @@ def test_all_points_membership_vectors_moons(
599588
min_cluster_size,
600589
allow_single_cluster,
601590
max_cluster_size,
602-
connectivity,
603591
batch_size,
604592
):
605593

@@ -646,7 +634,6 @@ def test_all_points_membership_vectors_moons(
646634
@pytest.mark.parametrize("allow_single_cluster", [True, False])
647635
@pytest.mark.parametrize("max_cluster_size", [0])
648636
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
649-
@pytest.mark.parametrize("connectivity", ["knn"])
650637
@pytest.mark.parametrize("batch_size", [128, 1000])
651638
def test_all_points_membership_vectors_circles(
652639
nrows,
@@ -656,7 +643,6 @@ def test_all_points_membership_vectors_circles(
656643
min_cluster_size,
657644
allow_single_cluster,
658645
max_cluster_size,
659-
connectivity,
660646
batch_size,
661647
):
662648
X, y = datasets.make_circles(
@@ -780,7 +766,6 @@ def test_approximate_predict_blobs(
780766
@pytest.mark.parametrize("allow_single_cluster", [True, False])
781767
@pytest.mark.parametrize("max_cluster_size", [0])
782768
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
783-
@pytest.mark.parametrize("connectivity", ["knn"])
784769
def test_approximate_predict_moons(
785770
nrows,
786771
n_points_to_predict,
@@ -790,7 +775,6 @@ def test_approximate_predict_moons(
790775
allow_single_cluster,
791776
max_cluster_size,
792777
cluster_selection_method,
793-
connectivity,
794778
):
795779

796780
X, y = datasets.make_moons(
@@ -845,7 +829,6 @@ def test_approximate_predict_moons(
845829
@pytest.mark.parametrize("allow_single_cluster", [True, False])
846830
@pytest.mark.parametrize("max_cluster_size", [0])
847831
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
848-
@pytest.mark.parametrize("connectivity", ["knn"])
849832
def test_approximate_predict_circles(
850833
nrows,
851834
n_points_to_predict,
@@ -855,7 +838,6 @@ def test_approximate_predict_circles(
855838
allow_single_cluster,
856839
max_cluster_size,
857840
cluster_selection_method,
858-
connectivity,
859841
):
860842
X, y = datasets.make_circles(
861843
n_samples=nrows + n_points_to_predict,
@@ -911,7 +893,6 @@ def test_approximate_predict_circles(
911893
@pytest.mark.parametrize("allow_single_cluster", [False])
912894
@pytest.mark.parametrize("max_cluster_size", [0])
913895
@pytest.mark.parametrize("cluster_selection_method", ["eom"])
914-
@pytest.mark.parametrize("connectivity", ["knn"])
915896
def test_approximate_predict_digits(
916897
n_points_to_predict,
917898
min_samples,
@@ -920,7 +901,6 @@ def test_approximate_predict_digits(
920901
allow_single_cluster,
921902
max_cluster_size,
922903
cluster_selection_method,
923-
connectivity,
924904
):
925905
digits = datasets.load_digits()
926906
X, y = digits.data, digits.target
@@ -1053,7 +1033,6 @@ def test_membership_vector_blobs(
10531033
@pytest.mark.parametrize("allow_single_cluster", [True, False])
10541034
@pytest.mark.parametrize("max_cluster_size", [0])
10551035
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
1056-
@pytest.mark.parametrize("connectivity", ["knn"])
10571036
@pytest.mark.parametrize("batch_size", [16])
10581037
def test_membership_vector_moons(
10591038
nrows,
@@ -1064,7 +1043,6 @@ def test_membership_vector_moons(
10641043
min_cluster_size,
10651044
allow_single_cluster,
10661045
max_cluster_size,
1067-
connectivity,
10681046
batch_size,
10691047
):
10701048

@@ -1117,7 +1095,6 @@ def test_membership_vector_moons(
11171095
@pytest.mark.parametrize("allow_single_cluster", [True, False])
11181096
@pytest.mark.parametrize("max_cluster_size", [0])
11191097
@pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
1120-
@pytest.mark.parametrize("connectivity", ["knn"])
11211098
@pytest.mark.parametrize("batch_size", [16])
11221099
def test_membership_vector_circles(
11231100
nrows,
@@ -1128,7 +1105,6 @@ def test_membership_vector_circles(
11281105
min_cluster_size,
11291106
allow_single_cluster,
11301107
max_cluster_size,
1131-
connectivity,
11321108
batch_size,
11331109
):
11341110
X, y = datasets.make_circles(

0 commit comments

Comments
 (0)