|
| 1 | +/* |
| 2 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | + * |
| 4 | + * This source code is licensed under the MIT license found in the |
| 5 | + * LICENSE file in the root directory of this source tree. |
| 6 | + */ |
| 7 | + |
| 8 | +#include <faiss/clone_index.h> |
| 9 | +#include <faiss/index_io.h> |
| 10 | +#include <faiss/utils/sharding.h> |
| 11 | +#include <cstdio> |
| 12 | + |
| 13 | +namespace faiss { |
| 14 | + |
| 15 | +std::string DefaultFilenameTemplateGenerator::operator()() { |
| 16 | + return "shard_%d.faissindex"; |
| 17 | +} |
| 18 | + |
| 19 | +int64_t DefaultShardingFunction::operator()(int64_t i, int64_t shard_count) { |
| 20 | + return i % shard_count; |
| 21 | +} |
| 22 | + |
| 23 | +std::vector<std::string> shard_ivf_index_centroids( |
| 24 | + IndexIVF* index, |
| 25 | + int64_t shard_count, |
| 26 | + std::shared_ptr<FilenameTemplateGenerator> filename_template_generator, |
| 27 | + std::shared_ptr<ShardingFunction> sharding_function) { |
| 28 | + if (index->quantizer->ntotal == 0) { |
| 29 | + return std::vector<std::string>(); |
| 30 | + } |
| 31 | + |
| 32 | + if (filename_template_generator == nullptr) { |
| 33 | + filename_template_generator = |
| 34 | + std::make_shared<DefaultFilenameTemplateGenerator>(); |
| 35 | + } |
| 36 | + if (sharding_function == nullptr) { |
| 37 | + sharding_function = std::make_shared<DefaultShardingFunction>(); |
| 38 | + } |
| 39 | + |
| 40 | + IndexIVF* sharded_indexes[shard_count]; |
| 41 | + for (int i = 0; i < shard_count; i++) { |
| 42 | + sharded_indexes[i] = static_cast<IndexIVF*>(clone_index(index)); |
| 43 | + sharded_indexes[i]->quantizer->reset(); |
| 44 | + } |
| 45 | + |
| 46 | + // assign centroids to each sharded Index based on sharding_function, and |
| 47 | + // add them to the quantizer of each sharded index |
| 48 | + std::vector<float> sharded_centroids[shard_count]; |
| 49 | + for (int i = 0; i < index->quantizer->ntotal; i++) { |
| 50 | + int shard_id = (*sharding_function)(i, shard_count); |
| 51 | + float reconstructed[index->quantizer->d]; |
| 52 | + index->quantizer->reconstruct(i, reconstructed); |
| 53 | + sharded_centroids[shard_id].insert( |
| 54 | + sharded_centroids[shard_id].end(), |
| 55 | + &reconstructed[0], |
| 56 | + &reconstructed[index->quantizer->d]); |
| 57 | + } |
| 58 | + for (int i = 0; i < shard_count; i++) { |
| 59 | + sharded_indexes[i]->quantizer->add( |
| 60 | + sharded_centroids[i].size() / index->quantizer->d, |
| 61 | + sharded_centroids[i].data()); |
| 62 | + } |
| 63 | + |
| 64 | + std::vector<std::string> result; |
| 65 | + for (int i = 0; i < shard_count; i++) { |
| 66 | + char fname[256]; |
| 67 | + std::string template_filename = (*filename_template_generator)(); |
| 68 | + snprintf(fname, 256, template_filename.c_str(), i); |
| 69 | + result.emplace_back(fname); |
| 70 | + write_index(sharded_indexes[i], fname); |
| 71 | + } |
| 72 | + |
| 73 | + for (int i = 0; i < shard_count; i++) { |
| 74 | + delete sharded_indexes[i]; |
| 75 | + } |
| 76 | + |
| 77 | + return result; |
| 78 | +} |
| 79 | + |
| 80 | +} // namespace faiss |
0 commit comments