diff --git a/faiss/Clustering.cpp b/faiss/Clustering.cpp index e557deaa51..33c939f088 100644 --- a/faiss/Clustering.cpp +++ b/faiss/Clustering.cpp @@ -33,22 +33,6 @@ Clustering::Clustering(int d, int k) : d(d), k(k) {} Clustering::Clustering(int d, int k, const ClusteringParameters& cp) : ClusteringParameters(cp), d(d), k(k) {} -static double imbalance_factor(int n, int k, int64_t* assign) { - std::vector hist(k, 0); - for (int i = 0; i < n; i++) - hist[assign[i]]++; - - double tot = 0, uf = 0; - - for (int i = 0; i < k; i++) { - tot += hist[i]; - uf += hist[i] * (double)hist[i]; - } - uf = uf * k / (tot * tot); - - return uf; -} - void Clustering::post_process_centroids() { if (spherical) { fvec_renorm_L2(d, k, centroids.data()); diff --git a/faiss/invlists/InvertedLists.cpp b/faiss/invlists/InvertedLists.cpp index f02b2d250a..20542fcf9a 100644 --- a/faiss/invlists/InvertedLists.cpp +++ b/faiss/invlists/InvertedLists.cpp @@ -181,7 +181,7 @@ size_t InvertedLists::copy_subset_to( } double InvertedLists::imbalance_factor() const { - std::vector hist(nlist); + std::vector hist(nlist); for (size_t i = 0; i < nlist; i++) { hist[i] = list_size(i); diff --git a/faiss/utils/utils.cpp b/faiss/utils/utils.cpp index 85bf1348f2..0811cb9030 100644 --- a/faiss/utils/utils.cpp +++ b/faiss/utils/utils.cpp @@ -387,7 +387,7 @@ size_t ranklist_intersection_size( return count; } -double imbalance_factor(int k, const int* hist) { +double imbalance_factor(int k, const int64_t* hist) { double tot = 0, uf = 0; for (int i = 0; i < k; i++) { @@ -399,9 +399,9 @@ double imbalance_factor(int k, const int* hist) { return uf; } -double imbalance_factor(int n, int k, const int64_t* assign) { - std::vector hist(k, 0); - for (int i = 0; i < n; i++) { +double imbalance_factor(int64_t n, int k, const int64_t* assign) { + std::vector hist(k, 0); + for (int64_t i = 0; i < n; i++) { hist[assign[i]]++; } diff --git a/faiss/utils/utils.h b/faiss/utils/utils.h index 901459d1c7..7d75b3200d 100644 --- a/faiss/utils/utils.h +++ b/faiss/utils/utils.h @@ -92,10 +92,10 @@ size_t merge_result_table_with( /// a balanced assignment has a IF of 1, a completely unbalanced assignment has /// an IF = k. -double imbalance_factor(int n, int k, const int64_t* assign); +double imbalance_factor(int64_t n, int k, const int64_t* assign); /// same, takes a histogram as input -double imbalance_factor(int k, const int* hist); +double imbalance_factor(int k, const int64_t* hist); /// compute histogram on v int ivec_hist(size_t n, const int* v, int vmax, int* hist);