-
Notifications
You must be signed in to change notification settings - Fork 623
SpectralClustering in cuml.accel
#7804
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
9c0d36c
ecfd2b3
3356c66
ebfdb91
178e310
afca415
8e65318
a5e3e37
23a75ed
f456595
7e9155d
e4c1143
a779f60
5ca720f
20501e2
6fa7376
eb4547c
f987713
11e9c4c
e66c72f
c5923a2
044806a
def0dba
f514de9
8333cf6
175dc91
28896cf
8c7e73b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,6 +106,18 @@ KMeans | |
| - If a callable ``init`` is provided. | ||
| - If ``X`` is sparse. | ||
|
|
||
| SpectralClustering | ||
| ^^^^^^^^^^^^^^^^^^ | ||
|
|
||
| ``SpectralClustering`` will fall back to CPU in the following cases: | ||
|
|
||
| - If ``assign_labels`` is not ``"kmeans"``. | ||
| - If ``affinity`` is not ``"nearest_neighbors"`` or ``"precomputed"``. | ||
|
|
||
| The following fitted attributes are currently not computed: | ||
|
|
||
| - ``affinity_matrix_`` | ||
|
coderabbitai[bot] marked this conversation as resolved.
Comment on lines
+109
to
+122
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with the bot comments here, if the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in e66c72f |
||
|
|
||
| DBSCAN | ||
| ^^^^^^ | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
|
|
||
| import numpy as np | ||
| import pytest | ||
| from sklearn.cluster import SpectralClustering | ||
| from sklearn.datasets import make_blobs | ||
| from sklearn.metrics import adjusted_rand_score | ||
|
|
||
|
|
||
| @pytest.fixture(scope="module") | ||
| def clustering_data(): | ||
| X, y = make_blobs( | ||
| n_samples=300, centers=3, cluster_std=1.0, random_state=42 | ||
| ) | ||
| return X.astype(np.float32), y | ||
|
|
||
|
|
||
| def test_spectral_clustering_default(clustering_data): | ||
| X, y = clustering_data | ||
| sc = SpectralClustering(affinity="nearest_neighbors", random_state=42).fit( | ||
| X | ||
| ) | ||
| assert sc.labels_.shape == y.shape | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("n_clusters", [2, 3, 4, 5]) | ||
| def test_spectral_clustering_n_clusters(clustering_data, n_clusters): | ||
| X, y_true = clustering_data | ||
| sc = SpectralClustering( | ||
| n_clusters=n_clusters, | ||
| affinity="nearest_neighbors", | ||
| random_state=42, | ||
| ).fit(X) | ||
| y_pred = sc.labels_ | ||
| adjusted_rand_score(y_true, y_pred) | ||
|
aamijar marked this conversation as resolved.
Outdated
|
||
|
|
||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| @pytest.mark.parametrize("n_neighbors", [5, 10, 20]) | ||
| def test_spectral_clustering_n_neighbors(clustering_data, n_neighbors): | ||
| X, y_true = clustering_data | ||
| sc = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| n_neighbors=n_neighbors, | ||
| random_state=42, | ||
| ).fit(X) | ||
| y_pred = sc.labels_ | ||
| adjusted_rand_score(y_true, y_pred) | ||
|
aamijar marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| @pytest.mark.parametrize("n_components", [2, 3, 5]) | ||
| def test_spectral_clustering_n_components(clustering_data, n_components): | ||
| X, y_true = clustering_data | ||
| sc = SpectralClustering( | ||
| n_clusters=3, | ||
| n_components=n_components, | ||
| affinity="nearest_neighbors", | ||
| random_state=42, | ||
| ).fit(X) | ||
| y_pred = sc.labels_ | ||
| adjusted_rand_score(y_true, y_pred) | ||
|
aamijar marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| @pytest.mark.parametrize("n_init", [1, 5, 10]) | ||
| def test_spectral_clustering_n_init(clustering_data, n_init): | ||
| X, y_true = clustering_data | ||
| sc = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| n_init=n_init, | ||
| random_state=42, | ||
| ).fit(X) | ||
| y_pred = sc.labels_ | ||
| adjusted_rand_score(y_true, y_pred) | ||
|
aamijar marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| @pytest.mark.parametrize("eigen_tol", ["auto", 0.0, 1e-4]) | ||
| def test_spectral_clustering_eigen_tol(clustering_data, eigen_tol): | ||
| X, y_true = clustering_data | ||
| sc = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| eigen_tol=eigen_tol, | ||
| random_state=42, | ||
| ).fit(X) | ||
| y_pred = sc.labels_ | ||
| adjusted_rand_score(y_true, y_pred) | ||
|
aamijar marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "assign_labels", ["kmeans", "discretize", "cluster_qr"] | ||
| ) | ||
| def test_spectral_clustering_assign_labels(clustering_data, assign_labels): | ||
| X, y_true = clustering_data | ||
| sc = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| assign_labels=assign_labels, | ||
| random_state=42, | ||
| ).fit(X) | ||
| y_pred = sc.labels_ | ||
| adjusted_rand_score(y_true, y_pred) | ||
|
aamijar marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| def test_spectral_clustering_precomputed(clustering_data): | ||
| from sklearn.neighbors import kneighbors_graph | ||
|
|
||
| X, y_true = clustering_data | ||
| connectivity = kneighbors_graph(X, n_neighbors=10, include_self=True) | ||
| affinity_matrix = 0.5 * (connectivity + connectivity.T) | ||
| sc = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="precomputed", | ||
| random_state=42, | ||
| ).fit(affinity_matrix) | ||
| y_pred = sc.labels_ | ||
| adjusted_rand_score(y_true, y_pred) | ||
|
aamijar marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| def test_spectral_clustering_fit_predict(clustering_data): | ||
| X, y_true = clustering_data | ||
| sc = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| random_state=42, | ||
| ) | ||
| labels = sc.fit_predict(X) | ||
| assert labels.shape == y_true.shape | ||
| assert np.array_equal(labels, sc.labels_) | ||
|
|
||
|
|
||
| def test_spectral_clustering_random_state(clustering_data): | ||
| X, _ = clustering_data | ||
| sc1 = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| random_state=42, | ||
| ).fit(X) | ||
| sc2 = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| random_state=42, | ||
| ).fit(X) | ||
| assert np.array_equal(sc1.labels_, sc2.labels_), ( | ||
| "Results should be consistent with the same random_state" | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,7 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| from sklearn.cluster import DBSCAN, KMeans | ||
| from sklearn.cluster import DBSCAN, KMeans, SpectralClustering | ||
| from sklearn.datasets import make_blobs, make_classification, make_regression | ||
| from sklearn.decomposition import PCA, TruncatedSVD | ||
| from sklearn.linear_model import ( | ||
|
|
@@ -32,6 +32,15 @@ def test_dbscan(): | |
| clf.labels_ | ||
|
|
||
|
|
||
| def test_spectral_clustering(): | ||
| X, y_true = make_blobs(n_samples=100, centers=3, random_state=42) | ||
| X = X.astype("float32") | ||
| sc = SpectralClustering( | ||
| n_clusters=3, affinity="nearest_neighbors", random_state=42 | ||
| ).fit(X) | ||
| sc.labels_ | ||
|
Comment on lines
+35
to
+41
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add assertions and remove unused Right now the test evaluates ✅ Suggested fix def test_spectral_clustering():
- X, y_true = make_blobs(n_samples=100, centers=3, random_state=42)
+ X, _ = make_blobs(n_samples=100, centers=3, random_state=42)
X = X.astype("float32")
sc = SpectralClustering(
n_clusters=3, affinity="nearest_neighbors", random_state=42
).fit(X)
- sc.labels_
+ assert sc.labels_.shape == (X.shape[0],)
+ assert len(set(sc.labels_.tolist())) == 3🧰 Tools🪛 Ruff (0.15.1)[warning] 36-36: Unpacked variable Prefix it with an underscore or any other dummy variable pattern (RUF059) [warning] 41-41: Found useless expression. Either assign it to a variable or remove it. (B018) 🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def test_pca(): | ||
| X, _ = make_blobs(n_samples=100, centers=3, random_state=42) | ||
| pca = PCA().fit(X) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,7 +32,7 @@ | |
| from sklearn.utils.validation import check_is_fitted | ||
|
|
||
| import cuml | ||
| from cuml.cluster import DBSCAN, KMeans | ||
| from cuml.cluster import DBSCAN, KMeans, SpectralClustering | ||
| from cuml.decomposition import PCA, TruncatedSVD | ||
| from cuml.internals.interop import UnsupportedOnCPU, UnsupportedOnGPU | ||
| from cuml.linear_model import ( | ||
|
|
@@ -194,6 +194,23 @@ def test_dbscan(random_state): | |
| assert array_equal(original.labels_, roundtrip_model.labels_) | ||
|
|
||
|
|
||
| def test_spectral_clustering(random_state): | ||
| X, _ = make_blobs( | ||
| n_samples=100, n_features=10, centers=3, random_state=random_state | ||
| ) | ||
| X = X.astype(np.float32) | ||
| original = SpectralClustering( | ||
| n_clusters=3, | ||
| affinity="nearest_neighbors", | ||
| n_neighbors=10, | ||
| random_state=random_state, | ||
| ) | ||
| original.fit(X) | ||
| sklearn_model = original.as_sklearn() | ||
| roundtrip_model = SpectralClustering.from_sklearn(sklearn_model) | ||
| assert array_equal(original.labels_, roundtrip_model.labels_) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not using
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in e66c72f |
||
|
|
||
|
|
||
| def test_pca(random_state): | ||
| X = np.random.RandomState(random_state).rand(50, 5) | ||
| original = PCA(n_components=2) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.