Skip to content

Commit c3a77ec

Browse files
authored
Use random tmp names for index files in tests (#837)
Replace all hard-coded index file names in tests with a randomly-generated unique paths in the system temporary folder and delete the files after use. This is a slightly opinionated convenience PR to give the following benefits: - Temporary index files do not clutter the project folder - One can run multiple test instances in parallel without the danger of data corruption (e.g. for stress-testing) - The tests do not fail if the current folder is read-only Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: #837
1 parent b801218 commit c3a77ec

7 files changed

Lines changed: 58 additions & 23 deletions

File tree

cpp/tests/neighbors/ann_brute_force.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,10 @@ class AnnBruteForceTest : public ::testing::TestWithParam<AnnBruteForceInputs<Id
115115
stream_,
116116
true));
117117

118-
brute_force::serialize(handle_, std::string{"brute_force_index"}, idx, true);
118+
tmp_index_file index_file;
119+
brute_force::serialize(handle_, index_file.filename, idx, true);
119120
auto index_loaded = brute_force::index<DataT, T>(handle_);
120-
brute_force::deserialize(handle_, std::string{"brute_force_index"}, &index_loaded);
121+
brute_force::deserialize(handle_, index_file.filename, &index_loaded);
121122

122123
brute_force::search(handle_,
123124
index_loaded,

cpp/tests/neighbors/ann_cagra.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
408408
auto database_view = raft::make_device_matrix_view<const DataT, int64_t>(
409409
(const DataT*)database.data(), ps.n_rows, ps.dim);
410410

411+
tmp_index_file index_file;
411412
{
412413
std::optional<raft::host_matrix<DataT, int64_t>> database_host{std::nullopt};
413414
cagra::index<DataT, IdxT> index(handle_, index_params.metric);
@@ -422,11 +423,11 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
422423
index = cagra::build(handle_, index_params, database_view);
423424
};
424425

425-
cagra::serialize(handle_, "cagra_index", index, ps.include_serialized_dataset);
426+
cagra::serialize(handle_, index_file.filename, index, ps.include_serialized_dataset);
426427
}
427428

428429
cagra::index<DataT, IdxT> index(handle_);
429-
cagra::deserialize(handle_, "cagra_index", &index);
430+
cagra::deserialize(handle_, index_file.filename, &index);
430431

431432
if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); }
432433

cpp/tests/neighbors/ann_ivf_flat.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,10 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
197197
indices_ivfflat_dev.data(), ps.num_queries, ps.k);
198198
auto dists_out_view = raft::make_device_matrix_view<T, IdxT>(
199199
distances_ivfflat_dev.data(), ps.num_queries, ps.k);
200-
const std::string filename = "ivf_flat_index";
201-
cuvs::neighbors::ivf_flat::serialize(handle_, filename, index_2);
200+
tmp_index_file index_file;
201+
cuvs::neighbors::ivf_flat::serialize(handle_, index_file.filename, index_2);
202202
cuvs::neighbors::ivf_flat::index<DataT, IdxT> index_loaded(handle_);
203-
cuvs::neighbors::ivf_flat::deserialize(handle_, filename, &index_loaded);
203+
cuvs::neighbors::ivf_flat::deserialize(handle_, index_file.filename, &index_loaded);
204204
ASSERT_EQ(index_2.size(), index_loaded.size());
205205

206206
cuvs::neighbors::ivf_flat::search(handle_,

cpp/tests/neighbors/ann_ivf_pq.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,10 @@ class ivf_pq_test : public ::testing::TestWithParam<ivf_pq_inputs> {
267267

268268
auto build_serialize()
269269
{
270-
std::string filename = "ivf_pq_index";
271-
cuvs::neighbors::ivf_pq::serialize(handle_, filename, build_only());
270+
tmp_index_file index_file;
271+
cuvs::neighbors::ivf_pq::serialize(handle_, index_file.filename, build_only());
272272
cuvs::neighbors::ivf_pq::index<IdxT> index(handle_);
273-
cuvs::neighbors::ivf_pq::deserialize(handle_, filename, &index);
273+
cuvs::neighbors::ivf_pq::deserialize(handle_, index_file.filename, &index);
274274
return index;
275275
}
276276

cpp/tests/neighbors/ann_utils.cuh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
#include "naive_knn.cuh"
3333

3434
#include "../test_utils.cuh"
35+
#include <atomic>
36+
#include <cstdio>
37+
#include <filesystem>
3538
#include <gtest/gtest.h>
3639
#include <iostream>
3740
#include <limits>
@@ -346,4 +349,27 @@ auto eval_distances(raft::resources const& handle,
346349
}
347350
return testing::AssertionSuccess();
348351
}
352+
353+
/**
354+
* A helper class to create a temporary file for a cuVS index object in the system's temp directory.
355+
* The file will be automatically deleted when the object is destroyed.
356+
*/
357+
struct tmp_index_file {
358+
// Ideally, we should use std::tmpfile() or another system-provided API to create a temporary
359+
// file. However, our API requires a file name, so we cannot use the file descriptors. There's no
360+
// recommended way to generate a robust unique temp filenames, so we use a combination of a
361+
// counter, process id, and random number.
362+
std::string filename = (std::filesystem::temp_directory_path() /
363+
("cuvs_" + std::to_string(getpid()) + "_" + std::to_string(counter++) +
364+
"_" + std::to_string(std::rand())))
365+
.string();
366+
~tmp_index_file()
367+
{
368+
if (std::filesystem::exists(filename)) { std::filesystem::remove(filename); }
369+
}
370+
371+
private:
372+
static inline std::atomic<uint64_t> counter = 0;
373+
};
374+
349375
} // namespace cuvs::neighbors

cpp/tests/neighbors/ann_vamana.cuh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ class AnnVamanaTest : public ::testing::TestWithParam<AnnVamanaInputs> {
155155

156156
CheckGraph<DataT, IdxT>(&index, ps, stream_);
157157

158-
vamana::serialize(handle_, "vamana_index", index);
158+
tmp_index_file index_file;
159+
vamana::serialize(handle_, index_file.filename, index);
159160

160161
// Test recall by searching with CAGRA search
161162
if (ps.graph_degree < 256) { // CAGRA search result buffer cannot support larger graph degree

cpp/tests/neighbors/mg.cuh

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,14 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
126126
auto distances = raft::make_host_matrix_view<float, int64_t, row_major>(
127127
distances_snmg_ann.data(), ps.num_queries, ps.k);
128128

129+
tmp_index_file index_file;
129130
{
130131
auto index = cuvs::neighbors::ivf_flat::build(clique_, index_params, index_dataset);
131132
cuvs::neighbors::ivf_flat::extend(clique_, index, index_dataset, std::nullopt);
132-
cuvs::neighbors::ivf_flat::serialize(clique_, index, "mg_ivf_flat_index");
133+
cuvs::neighbors::ivf_flat::serialize(clique_, index, index_file.filename);
133134
}
134135
auto new_index =
135-
cuvs::neighbors::ivf_flat::deserialize<DataT, int64_t>(clique_, "mg_ivf_flat_index");
136+
cuvs::neighbors::ivf_flat::deserialize<DataT, int64_t>(clique_, index_file.filename);
136137

137138
if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK)
138139
search_params.merge_mode = MERGE_ON_ROOT_RANK;
@@ -187,13 +188,14 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
187188
auto distances = raft::make_host_matrix_view<float, int64_t, row_major>(
188189
distances_snmg_ann.data(), ps.num_queries, ps.k);
189190

191+
tmp_index_file index_file;
190192
{
191193
auto index = cuvs::neighbors::ivf_pq::build(clique_, index_params, index_dataset);
192194
cuvs::neighbors::ivf_pq::extend(clique_, index, index_dataset, std::nullopt);
193-
cuvs::neighbors::ivf_pq::serialize(clique_, index, "mg_ivf_pq_index");
195+
cuvs::neighbors::ivf_pq::serialize(clique_, index, index_file.filename);
194196
}
195197
auto new_index =
196-
cuvs::neighbors::ivf_pq::deserialize<DataT, int64_t>(clique_, "mg_ivf_pq_index");
198+
cuvs::neighbors::ivf_pq::deserialize<DataT, int64_t>(clique_, index_file.filename);
197199

198200
if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK)
199201
search_params.merge_mode = MERGE_ON_ROOT_RANK;
@@ -243,12 +245,13 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
243245
auto distances = raft::make_host_matrix_view<float, uint32_t, row_major>(
244246
distances_snmg_ann.data(), ps.num_queries, ps.k);
245247

248+
tmp_index_file index_file;
246249
{
247250
auto index = cuvs::neighbors::cagra::build(clique_, index_params, index_dataset);
248-
cuvs::neighbors::cagra::serialize(clique_, index, "mg_cagra_index");
251+
cuvs::neighbors::cagra::serialize(clique_, index, index_file.filename);
249252
}
250253
auto new_index =
251-
cuvs::neighbors::cagra::deserialize<DataT, uint32_t>(clique_, "mg_cagra_index");
254+
cuvs::neighbors::cagra::deserialize<DataT, uint32_t>(clique_, index_file.filename);
252255

253256
if (ps.m_mode == m_mode_t::MERGE_ON_ROOT_RANK)
254257
search_params.merge_mode = MERGE_ON_ROOT_RANK;
@@ -286,11 +289,12 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
286289
search_params.n_probes = ps.nprobe;
287290
search_params.search_mode = LOAD_BALANCER;
288291

292+
tmp_index_file index_file;
289293
{
290294
auto index_dataset = raft::make_device_matrix_view<const DataT, int64_t>(
291295
d_index_dataset.data(), ps.num_db_vecs, ps.dim);
292296
auto index = cuvs::neighbors::ivf_flat::build(clique_, index_params, index_dataset);
293-
ivf_flat::serialize(clique_, "local_ivf_flat_index", index);
297+
ivf_flat::serialize(clique_, index_file.filename, index);
294298
}
295299

296300
auto queries = raft::make_host_matrix_view<const DataT, int64_t, row_major>(
@@ -301,7 +305,7 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
301305
distances_snmg_ann.data(), ps.num_queries, ps.k);
302306

303307
auto distributed_index =
304-
cuvs::neighbors::ivf_flat::distribute<DataT, int64_t>(clique_, "local_ivf_flat_index");
308+
cuvs::neighbors::ivf_flat::distribute<DataT, int64_t>(clique_, index_file.filename);
305309
search_params.merge_mode = TREE_MERGE;
306310

307311
search_params.n_rows_per_batch = n_rows_per_search_batch;
@@ -335,11 +339,12 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
335339
search_params.n_probes = ps.nprobe;
336340
search_params.search_mode = LOAD_BALANCER;
337341

342+
tmp_index_file index_file;
338343
{
339344
auto index_dataset = raft::make_device_matrix_view<const DataT, int64_t>(
340345
d_index_dataset.data(), ps.num_db_vecs, ps.dim);
341346
auto index = cuvs::neighbors::ivf_pq::build(clique_, index_params, index_dataset);
342-
ivf_pq::serialize(clique_, "local_ivf_pq_index", index);
347+
ivf_pq::serialize(clique_, index_file.filename, index);
343348
}
344349

345350
auto queries = raft::make_host_matrix_view<const DataT, int64_t, row_major>(
@@ -350,7 +355,7 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
350355
distances_snmg_ann.data(), ps.num_queries, ps.k);
351356

352357
auto distributed_index =
353-
cuvs::neighbors::ivf_pq::distribute<DataT, int64_t>(clique_, "local_ivf_pq_index");
358+
cuvs::neighbors::ivf_pq::distribute<DataT, int64_t>(clique_, index_file.filename);
354359
search_params.merge_mode = TREE_MERGE;
355360

356361
search_params.n_rows_per_batch = n_rows_per_search_batch;
@@ -379,11 +384,12 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
379384

380385
mg_search_params<cagra::search_params> search_params;
381386

387+
tmp_index_file index_file;
382388
{
383389
auto index_dataset = raft::make_device_matrix_view<const DataT, int64_t>(
384390
d_index_dataset.data(), ps.num_db_vecs, ps.dim);
385391
auto index = cuvs::neighbors::cagra::build(clique_, index_params, index_dataset);
386-
cuvs::neighbors::cagra::serialize(clique_, "local_cagra_index", index);
392+
cuvs::neighbors::cagra::serialize(clique_, index_file.filename, index);
387393
}
388394

389395
auto queries = raft::make_host_matrix_view<const DataT, int64_t, row_major>(
@@ -394,7 +400,7 @@ class AnnMGTest : public ::testing::TestWithParam<AnnMGInputs> {
394400
distances_snmg_ann.data(), ps.num_queries, ps.k);
395401

396402
auto distributed_index =
397-
cuvs::neighbors::cagra::distribute<DataT, uint32_t>(clique_, "local_cagra_index");
403+
cuvs::neighbors::cagra::distribute<DataT, uint32_t>(clique_, index_file.filename);
398404

399405
search_params.merge_mode = TREE_MERGE;
400406

0 commit comments

Comments
 (0)