diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.cuh b/cpp/src/umap/fuzzy_simpl_set/naive.cuh index 29f632692d..1939387308 100644 --- a/cpp/src/umap/fuzzy_simpl_set/naive.cuh +++ b/cpp/src/umap/fuzzy_simpl_set/naive.cuh @@ -80,7 +80,8 @@ static const float MIN_K_DIST_SCALE = 1e-3; * */ template -CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists, +CUML_KERNEL void smooth_knn_dist_kernel(bool* error_status, + const value_t* knn_dists, int n, float mean_dist, value_t* sigmas, @@ -120,6 +121,11 @@ CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists, if (cur_dist > max_nonzero) max_nonzero = cur_dist; } + if (start_nonzero == -1) { + *error_status = true; + return; + } + float ith_distances_mean = sum / float(n_neighbors); if (total_nonzero >= local_connectivity) { int index = int(floor(local_connectivity)); @@ -265,9 +271,18 @@ void smooth_knn_dist(nnz_t n, /** * Smooth kNN distances to be continuous */ + + bool has_found_an_error = false; + rmm::device_scalar error_status(stream); + error_status.set_value_async(has_found_an_error, stream); + smooth_knn_dist_kernel<<>>( - knn_dists, n, mean_dist, sigmas, rhos, n_neighbors, local_connectivity); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + error_status.data(), knn_dists, n, mean_dist, sigmas, rhos, n_neighbors, local_connectivity); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + + has_found_an_error = error_status.value(stream); + RAFT_EXPECTS(!has_found_an_error, + "At least one row does not have any neighbor with non-zero distance."); } template diff --git a/python/cuml/cuml/tests/test_umap.py b/python/cuml/cuml/tests/test_umap.py index 96d6bbf532..c68d981d16 100644 --- a/python/cuml/cuml/tests/test_umap.py +++ b/python/cuml/cuml/tests/test_umap.py @@ -783,8 +783,13 @@ def test_umap_distance_metrics_fit_transform_trust( def test_umap_distance_metrics_fit_transform_trust_on_sparse_input( metric, supported, umap_learn_supported ): + if metric == "jaccard": + n_features = 1000 + else: + n_features = 64 + data, labels = make_blobs( - n_samples=1000, n_features=64, centers=5, random_state=42 + n_samples=1000, n_features=n_features, centers=5, random_state=42 ) data_selection = np.random.RandomState(42).choice(