-
Notifications
You must be signed in to change notification settings - Fork 184
SNMG ANN build with OpenMP nested parallelism #1526
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
e166e20
c8aadaf
a234f47
fdeeb9e
666736f
03500fe
0b34169
466877e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -92,13 +92,28 @@ void build(const raft::resources& clique, | |
| RAFT_LOG_DEBUG("REPLICATED BUILD: %d*%drows", index.num_ranks_, n_rows); | ||
|
|
||
| index.ann_interfaces_.resize(index.num_ranks_); | ||
| #pragma omp parallel for | ||
|
|
||
| // Enable nested parallelism | ||
| int saved_omp_threads = cuvs::core::omp::get_max_threads(); | ||
| int threads_per_rank = std::max(1, saved_omp_threads / index.num_ranks_); | ||
| cuvs::core::omp::set_nested(1); | ||
|
|
||
| const int& requirements = index.num_ranks_; | ||
| cuvs::core::omp::check_threads(requirements); | ||
|
|
||
| #pragma omp parallel for num_threads(index.num_ranks_) | ||
|
viclafargue marked this conversation as resolved.
|
||
| for (int rank = 0; rank < index.num_ranks_; rank++) { | ||
| // Set thread limit for this rank's nested OpenMP regions | ||
| cuvs::core::omp::set_num_threads(threads_per_rank); | ||
|
viclafargue marked this conversation as resolved.
|
||
|
|
||
| const raft::resources& dev_res = raft::resource::set_current_device_to_rank(clique, rank); | ||
| auto& ann_if = index.ann_interfaces_[rank]; | ||
| cuvs::neighbors::build(dev_res, ann_if, index_params, index_dataset); | ||
| resource::sync_stream(dev_res); | ||
| } | ||
|
|
||
| // Restore original thread count | ||
| cuvs::core::omp::set_num_threads(saved_omp_threads); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this restoration is a bit confusing. the pool of threads is set inside the for loop above, but its restored after the for loop? Can you verify if this logic is correct?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. This is actually not necessary. My worry was that the main thread could be one of the threads in the loop.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also added the same OpenMP usage for the |
||
| } else if (index.mode_ == SHARDED) { | ||
| int64_t n_rows = index_dataset.extent(0); | ||
| int64_t n_cols = index_dataset.extent(1); | ||
|
|
@@ -107,8 +122,20 @@ void build(const raft::resources& clique, | |
| RAFT_LOG_DEBUG("SHARDED BUILD: %d*%drows", index.num_ranks_, n_rows_per_shard); | ||
|
|
||
| index.ann_interfaces_.resize(index.num_ranks_); | ||
| #pragma omp parallel for | ||
|
|
||
| // Enable nested parallelism | ||
| int saved_omp_threads = cuvs::core::omp::get_max_threads(); | ||
| int threads_per_rank = std::max(1, saved_omp_threads / index.num_ranks_); | ||
| cuvs::core::omp::set_nested(1); | ||
|
|
||
| const int& requirements = index.num_ranks_; | ||
|
viclafargue marked this conversation as resolved.
Outdated
|
||
| cuvs::core::omp::check_threads(requirements); | ||
|
|
||
| #pragma omp parallel for num_threads(index.num_ranks_) | ||
| for (int rank = 0; rank < index.num_ranks_; rank++) { | ||
| // Set thread limit for this rank's nested OpenMP regions | ||
| cuvs::core::omp::set_num_threads(threads_per_rank); | ||
|
|
||
| const raft::resources& dev_res = raft::resource::set_current_device_to_rank(clique, rank); | ||
| int64_t offset = rank * n_rows_per_shard; | ||
| int64_t n_rows_of_current_shard = std::min(n_rows_per_shard, n_rows - offset); | ||
|
|
@@ -119,6 +146,9 @@ void build(const raft::resources& clique, | |
| cuvs::neighbors::build(dev_res, ann_if, index_params, partition); | ||
|
viclafargue marked this conversation as resolved.
|
||
| resource::sync_stream(dev_res); | ||
| } | ||
|
|
||
|
viclafargue marked this conversation as resolved.
|
||
| // Restore original thread count | ||
| cuvs::core::omp::set_num_threads(saved_omp_threads); | ||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.