Skip to content

Commit a009b7f

Browse files
authored
Handling all identical vectors in UMAP smooth_knn_dist_kernel (#6904)
`smoot_knn_dist_kernel` just fails if all neighbors have zero distance (i.e. are identical vectors). Instead of failing, set `rhos` and `sigmas` to specific values in such cases. Closes #7017 Authors: - Jinsol Park (https://github.com/jinsolp) - Simon Adorf (https://github.com/csadorf) Approvers: - Victor Lafargue (https://github.com/viclafargue) - Simon Adorf (https://github.com/csadorf) URL: #6904
1 parent 124608b commit a009b7f

2 files changed

Lines changed: 5 additions & 13 deletions

File tree

cpp/src/umap/fuzzy_simpl_set/naive.cuh

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,7 @@ static const float MIN_K_DIST_SCALE = 1e-3;
8080
*
8181
*/
8282
template <typename value_t, typename nnz_t, int TPB_X>
83-
CUML_KERNEL void smooth_knn_dist_kernel(bool* error_status,
84-
const value_t* knn_dists,
83+
CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists,
8584
int n,
8685
float mean_dist,
8786
value_t* sigmas,
@@ -122,7 +121,9 @@ CUML_KERNEL void smooth_knn_dist_kernel(bool* error_status,
122121
}
123122

124123
if (start_nonzero == -1) {
125-
*error_status = true;
124+
// All distances are zero: identical neighbors
125+
rhos[row] = 0.0;
126+
sigmas[row] = MIN_K_DIST_SCALE * mean_dist; // e.g. 1e-5 * mean_dist
126127
return;
127128
}
128129

@@ -272,17 +273,9 @@ void smooth_knn_dist(nnz_t n,
272273
* Smooth kNN distances to be continuous
273274
*/
274275

275-
bool has_found_an_error = false;
276-
rmm::device_scalar<bool> error_status(stream);
277-
error_status.set_value_async(has_found_an_error, stream);
278-
279276
smooth_knn_dist_kernel<value_t, nnz_t, TPB_X><<<grid, blk, 0, stream>>>(
280-
error_status.data(), knn_dists, n, mean_dist, sigmas, rhos, n_neighbors, local_connectivity);
277+
knn_dists, n, mean_dist, sigmas, rhos, n_neighbors, local_connectivity);
281278
RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
282-
283-
has_found_an_error = error_status.value(stream);
284-
RAFT_EXPECTS(!has_found_an_error,
285-
"At least one row does not have any neighbor with non-zero distance.");
286279
}
287280

288281
template <typename value_t, typename value_idx, typename nnz_t, int TPB_X>

python/cuml/cuml_accel_tests/upstream/umap/xfail-list.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
- "umap.tests.test_umap_ops::test_umap_update"
1818
- "umap.tests.test_umap_ops::test_umap_update_large"
1919
- "umap.tests.test_umap_repeated_data::test_repeated_points_large_n"
20-
- "umap.tests.test_umap_trustworthiness::test_umap_sparse_trustworthiness"
2120
- "umap.tests.test_umap_validation_params::test_umap_bad_hellinger_data"
2221
- "umap.tests.test_umap_validation_params::test_umap_bad_metrics"
2322
- "umap.tests.test_umap_validation_params::test_umap_bad_n_components"

0 commit comments

Comments
 (0)