Skip to content

Commit 550aba7

Browse files
authored
Init checks for Dask KMeans (#7391)
Closes #7389 Authors: - Victor Lafargue (https://github.com/viclafargue) Approvers: - Divye Gala (https://github.com/divyegala) URL: #7391
1 parent 41a6ca1 commit 550aba7

1 file changed

Lines changed: 6 additions & 3 deletions

File tree

python/cuml/cuml/cluster/kmeans.pyx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,12 +537,15 @@ class KMeans(Base,
537537
"while a minimum of 1 is required by KMeans."
538538
)
539539

540-
# Skip this check if running in multigpu mode. In that case we don't care if
541-
# a single partition has fewer rows than clusters
542-
if not multigpu and n_rows < self.n_clusters:
540+
if n_rows < self.n_clusters:
543541
raise ValueError(
544542
f"n_samples={n_rows} should be >= n_clusters={self.n_clusters}."
545543
)
544+
if multigpu and (self.init == "k-means++" or self.oversampling_factor <= 0):
545+
raise ValueError(
546+
"k-means++ init or oversampling_factor=0 not supported "
547+
"for multi-GPU KMeans"
548+
)
546549

547550
# Allocate output cluster_centers_
548551
if isinstance(self.init, str):

0 commit comments

Comments
 (0)