Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion faiss/IndexFastScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ void IndexFastScan::search_dispatch_implem(
}
} else {
// explicitly slice over threads
#pragma omp parallel for num_threads(nt)
#pragma omp parallel for num_threads(num_omp_threads)
for (int slice = 0; slice < nt; slice++) {
idx_t i0 = n * slice / nt;
idx_t i1 = n * (slice + 1) / nt;
Expand Down
4 changes: 2 additions & 2 deletions faiss/IndexIVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ void IndexIVF::search_and_reconstruct(
labels,
true /* store_pairs */,
params);
#pragma omp parallel for if (n * k > 1000)
#pragma omp parallel for if (n * k > 1000) num_threads(num_omp_threads)
for (idx_t ij = 0; ij < n * k; ij++) {
idx_t key = labels[ij];
float* reconstructed = recons + ij * d;
Expand Down Expand Up @@ -1068,7 +1068,7 @@ void IndexIVF::search_and_return_codes(
code_size_1 += coarse_code_size();
}

#pragma omp parallel for if (n * k > 1000)
#pragma omp parallel for if (n * k > 1000) num_threads(num_omp_threads)
for (idx_t ij = 0; ij < n * k; ij++) {
idx_t key = labels[ij];
uint8_t* code1 = codes + ij * code_size_1;
Expand Down
2 changes: 1 addition & 1 deletion faiss/IndexIVFFastScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ void IndexIVFFastScan::range_search_dispatch_implem(
} else {
// explicitly slice over threads
int nslice = compute_search_nslice(this, n, cq.nprobe);
#pragma omp parallel
#pragma omp parallel num_threads(num_omp_threads)
{
RangeSearchPartialResult pres(&rres);

Expand Down
2 changes: 1 addition & 1 deletion faiss/impl/PolysemousTraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ void PolysemousTraining::optimize_reproduce_distances(
nt);
}

#pragma omp parallel for num_threads(nt)
#pragma omp parallel for num_threads(num_omp_threads)
for (int m = 0; m < pq.M; m++) {
std::vector<double> dis_table;

Expand Down
2 changes: 1 addition & 1 deletion faiss/impl/ProductQuantizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ void ProductQuantizer::decode(const uint8_t* code, float* x) const {
}

void ProductQuantizer::decode(const uint8_t* code, float* x, size_t n) const {
#pragma omp parallel for if (n > 100)
#pragma omp parallel for if (n > 100) num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
this->decode(code + code_size * i, x + d * i);
}
Expand Down
4 changes: 2 additions & 2 deletions faiss/impl/residual_quantizer_encode_steps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ void beam_search_encode_step(
}
InterruptCallback::check();

#pragma omp parallel for if (n > 100)
#pragma omp parallel for if (n > 100) num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
const int32_t* codes_i = codes + i * m * beam_size;
int32_t* new_codes_i = new_codes + i * (m + 1) * new_beam_size;
Expand Down Expand Up @@ -399,7 +399,7 @@ void beam_search_encode_step_tab(
{
FAISS_THROW_IF_NOT(ldc >= K);

#pragma omp parallel for if (n > 100) schedule(dynamic)
#pragma omp parallel for if (n > 100) schedule(dynamic) num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
std::vector<float> cent_distances(beam_size * K);
std::vector<float> cd_common(K);
Expand Down
2 changes: 1 addition & 1 deletion faiss/utils/distances.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void exhaustive_inner_product_seq(

FAISS_ASSERT(use_sel == (sel != nullptr));

#pragma omp parallel num_threads(nt)
#pragma omp parallel num_threads(num_omp_threads)
{
SingleResultHandler resi(res);
#pragma omp for
Expand Down
9 changes: 5 additions & 4 deletions faiss/utils/hamming.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ void hamming_range_search(
int radius,
size_t code_size,
RangeSearchResult* res) {
#pragma omp parallel
#pragma omp parallel num_threads(num_omp_threads)
{
RangeSearchPartialResult pres(res);

Expand Down Expand Up @@ -687,7 +687,7 @@ void pack_bitstrings(
uint8_t* packed,
size_t code_size) {
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
#pragma omp parallel for if (n > 1000)
#pragma omp parallel for if (n > 1000) num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
const int32_t* in = unpacked + i * M;
uint8_t* out = packed + i * code_size;
Expand All @@ -710,6 +710,7 @@ void pack_bitstrings(
totbit += nbit[j];
}
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
//RAHUL
#pragma omp parallel for if (n > 1000)
for (int64_t i = 0; i < n; i++) {
const int32_t* in = unpacked + i * M;
Expand All @@ -729,7 +730,7 @@ void unpack_bitstrings(
size_t code_size,
int32_t* unpacked) {
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
#pragma omp parallel for if (n > 1000)
#pragma omp parallel for if (n > 1000) num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
const uint8_t* in = packed + i * code_size;
int32_t* out = unpacked + i * M;
Expand All @@ -752,7 +753,7 @@ void unpack_bitstrings(
totbit += nbit[j];
}
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
#pragma omp parallel for if (n > 1000)
#pragma omp parallel for if (n > 1000) num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
const uint8_t* in = packed + i * code_size;
int32_t* out = unpacked + i * M;
Expand Down
21 changes: 9 additions & 12 deletions faiss/utils/sorting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void parallel_merge(
s2s[nt - 1].i1 = s2.i1;

// not sure parallel actually helps here
#pragma omp parallel for num_threads(nt)
#pragma omp parallel for num_threads(num_omp_threads)
for (int t = 0; t < nt; t++) {
s1s[t].i0 = s1.i0 + s1.len() * t / nt;
s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
Expand Down Expand Up @@ -93,7 +93,7 @@ void parallel_merge(
assert(sws[nt - 1].i1 == s1.i1);

// do the actual merging
#pragma omp parallel for num_threads(nt)
#pragma omp parallel for num_threads(num_omp_threads)
for (int t = 0; t < nt; t++) {
SegmentS sw = sws[t];
SegmentS s1t = s1s[t];
Expand Down Expand Up @@ -176,7 +176,7 @@ void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
int sub_nt = nseg % 2 == 0 ? nt : nt - 1;
int sub_nseg1 = nseg / 2;

#pragma omp parallel for num_threads(nseg1)
#pragma omp parallel for num_threads(num_omp_threads)
for (int s = 0; s < nseg; s += 2) {
if (s + 1 == nseg) { // otherwise isolated segment
memcpy(permB + segs[s].i0,
Expand Down Expand Up @@ -257,7 +257,7 @@ void bucket_sort_parallel(
int64_t* perm,
int nt_in) {
memset(lims, 0, sizeof(*lims) * (vmax + 1));
#pragma omp parallel num_threads(nt_in)
#pragma omp parallel num_threads(num_omp_threads)
{
int nt = omp_get_num_threads(); // might be different from nt_in
int rank = omp_get_thread_num();
Expand Down Expand Up @@ -483,7 +483,7 @@ void bucket_sort_inplace_parallel(
nbucket); // DON'T use std::vector<bool> that cannot be accessed
// safely from multiple threads!!!

#pragma omp parallel num_threads(nt_in)
#pragma omp parallel num_threads(num_omp_threads)
{
int nt = omp_get_num_threads(); // might be different from nt_in (?)
int rank = omp_get_thread_num();
Expand Down Expand Up @@ -709,7 +709,7 @@ inline int64_t hash_function(int64_t x) {

void hashtable_int64_to_int64_init(int log2_capacity, int64_t* tab) {
size_t capacity = (size_t)1 << log2_capacity;
#pragma omp parallel for
#pragma omp parallel for num_threads(num_omp_threads)
for (int64_t i = 0; i < capacity; i++) {
tab[2 * i] = -1;
tab[2 * i + 1] = -1;
Expand All @@ -728,8 +728,7 @@ void hashtable_int64_to_int64_add(
int64_t mask = capacity - 1;
int log2_nbucket = log2_capacity_to_log2_nbucket(log2_capacity);
size_t nbucket = (size_t)1 << log2_nbucket;

#pragma omp parallel for
#pragma omp parallel for num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
hk[i] = hash_function(keys[i]) & mask;
bucket_no[i] = hk[i] >> (log2_capacity - log2_nbucket);
Expand All @@ -744,9 +743,8 @@ void hashtable_int64_to_int64_add(
lims.data(),
perm.data(),
omp_get_max_threads());

int num_errors = 0;
#pragma omp parallel for reduction(+ : num_errors)
#pragma omp parallel for reduction(+ : num_errors) num_threads(num_omp_threads)
for (int64_t bucket = 0; bucket < nbucket; bucket++) {
size_t k0 = bucket << (log2_capacity - log2_nbucket);
size_t k1 = (bucket + 1) << (log2_capacity - log2_nbucket);
Expand Down Expand Up @@ -792,8 +790,7 @@ void hashtable_int64_to_int64_lookup(
std::vector<int64_t> hk(n), bucket_no(n);
int64_t mask = capacity - 1;
int log2_nbucket = log2_capacity_to_log2_nbucket(log2_capacity);

#pragma omp parallel for
#pragma omp parallel for num_threads(num_omp_threads)
for (int64_t i = 0; i < n; i++) {
int64_t k = keys[i];
int64_t hk = hash_function(k) & mask;
Expand Down
2 changes: 1 addition & 1 deletion faiss/utils/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ void bvecs_checksum(size_t n, size_t d, const uint8_t* a, uint64_t* cs) {
// so below codes only accept n <= std::numeric_limits<ssize_t>::max()
using ssize_t = std::make_signed<std::size_t>::type;
const ssize_t size = n;
#pragma omp parallel for if (size > 1000)
#pragma omp parallel for if (size > 1000) num_threads(num_omp_threads)
for (ssize_t i_ = 0; i_ < size; i_++) {
const auto i = static_cast<std::size_t>(i_);
cs[i] = bvec_checksum(d, a + i * d);
Expand Down