-
Notifications
You must be signed in to change notification settings - Fork 184
Diskann Benchmarking Wrapper #260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 62 commits
8e8d3c1
e937ebd
0bbbf0d
02084e2
64f1d60
706f22e
3ea499b
e0aab8f
17c5510
f426df9
a7bdd33
d2442ca
dbc84cc
7e37218
b2aef6d
b9762d5
bf75242
a8bcdef
3818da9
cd8bfe5
ec6d70c
c9f797a
585ad53
441ab2a
33b075d
81c92e6
11545c3
ffea663
9c1cddc
96d5642
a7eb787
4cbe7b1
63621f4
a890ac5
450dcee
b0f4b57
8cd6c40
d658856
d1e4101
2e080c6
8c6a178
626dc17
3d15882
a72165c
9c202f2
58a729c
2412b70
3a56402
0b39d5b
92ec474
333539f
a13bf1a
df54939
93b2620
54385ab
c35d899
31d846a
61e00c7
0bf43e9
0a6b094
645d84b
d6897cc
396a589
d325698
03a1e09
f061f27
f95aec7
b667786
4aa513f
17f723e
6532914
1f168a8
2a5d1fb
173df8f
46e7728
1b03cf7
c131c52
3d40d2d
c3a25fc
6bebeb8
e254c9b
82e21e8
de2bf84
6d3b32d
2506ef1
16f35b3
18f26a7
663dfe0
4bb2b39
fd43711
30bcc6e
a10d834
4b396d7
c443944
ed8c9b6
f015264
6d6167d
9c2185b
2b758d3
b7ba35b
63e02ff
48a6a9d
fd429d2
7acfe51
c7765d9
62b207f
66001c8
0d63fb0
b0fd532
47b3be6
30892f9
9743560
80e0b7e
36d2ed4
7b240e6
db3baf0
24db5b3
73ea8c2
caf63e3
f76fbc5
41ded12
3b5e9e2
5d95616
f2c7450
4c84789
b51856e
f7eeff7
fe18b95
e3a9b46
e51734f
ee9c304
d79206e
d81e4ed
a4698b9
48b1e08
c056056
f2f11eb
cb8809c
a444bb5
5ca556c
0956c9f
1e11125
5e80e66
dd5a4e6
b170e58
73deda3
a143f92
d8297e4
35da13b
74d0ae9
b4f301e
d09e71e
93b22c2
1af21a9
68814ce
bc2124a
55e7504
4e40849
3bc38c6
90bd4f9
b43a6b3
3f0b06a
63e40cd
e64fcb5
9ebef01
490e2ed
2ef2e35
18e2de4
55fb122
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,6 +135,12 @@ void bench_build(::benchmark::State& state, | |
| } | ||
| } | ||
|
|
||
| if (index.algo == "diskann_ssd") { | ||
| make_sure_parent_dir_exists(index.file); | ||
| index.build_param["dataset_file"] = dataset->base_filename(); | ||
| index.build_param["path_to_index"] = index.file; | ||
| } | ||
|
|
||
|
achirkin marked this conversation as resolved.
Outdated
|
||
| std::unique_ptr<algo<T>> algo; | ||
| try { | ||
| algo = create_algo<T>(index.algo, dataset->distance(), dataset->dim(), index.build_param); | ||
|
|
@@ -144,7 +150,8 @@ void bench_build(::benchmark::State& state, | |
|
|
||
| const auto algo_property = parse_algo_property(algo->get_preference(), index.build_param); | ||
|
|
||
| const T* base_set = dataset->base_set(algo_property.dataset_memory_type); | ||
| const T* base_set = nullptr; | ||
| if (index.algo != "diskann_ssd") base_set = dataset->base_set(algo_property.dataset_memory_type); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @achirkin if we do not have this line, the entire dataset will be read into the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can just set the |
||
| std::size_t index_size = dataset->base_set_size(); | ||
|
|
||
| cuda_timer gpu_timer{algo}; | ||
|
|
@@ -223,7 +230,12 @@ void bench_search(::benchmark::State& state, | |
|
|
||
| const T* query_set = nullptr; | ||
|
|
||
| if (!file_exists(index.file)) { | ||
| std::string filename; | ||
| if (index.algo != "diskann_ssd") | ||
| filename = index.file; | ||
| else | ||
| filename = index.file + "_disk.index"; | ||
|
achirkin marked this conversation as resolved.
Outdated
|
||
| if (!file_exists(filename)) { | ||
| state.SkipWithError("Index file is missing. Run the benchmark in the build mode first."); | ||
| return; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| /* | ||
| * Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| #pragma once | ||
|
|
||
| #include "../hnswlib/hnswlib_wrapper.h" | ||
| #include "cuvs_cagra_wrapper.h" | ||
|
|
||
| #include <memory> | ||
|
|
||
| namespace cuvs::bench { | ||
|
|
||
| template <typename T, typename IdxT> | ||
| class cuvs_cagra_diskann : public algo<T>, public algo_gpu { | ||
| public: | ||
| using search_param_base = typename algo<T>::search_param; | ||
| using build_param = typename cuvs_cagra<T, IdxT>::build_param; | ||
| using search_param = typename diskann_mem<T>::search_param; | ||
|
|
||
| cuvs_cagra_diskann(Metric metric, int dim, const build_param& param) | ||
| : algo<T>(metric, dim), | ||
| cagra_build_{metric, dim, param}, | ||
| // hnsw_lib param values don't matter since we don't build with hnsw_lib | ||
| diskann_mem_search_{metric, dim, typename diskann_mem<T>::build_param{50, 100}} | ||
| { | ||
| } | ||
|
|
||
| void build(const T* dataset, size_t nrow) final; | ||
|
|
||
| void set_search_param(const search_param_base& param) override; | ||
|
|
||
| void search(const T* queries, | ||
| int batch_size, | ||
| int k, | ||
| algo_base::index_type* neighbors, | ||
| float* distances) const override; | ||
|
|
||
| [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override | ||
| { | ||
| return cagra_build_.get_sync_stream(); | ||
| } | ||
|
|
||
| // to enable dataset access from GPU memory | ||
| [[nodiscard]] auto get_preference() const -> algo_property override | ||
| { | ||
| algo_property property; | ||
| property.dataset_memory_type = MemoryType::kHostMmap; | ||
| property.query_memory_type = MemoryType::kHost; | ||
| return property; | ||
| } | ||
|
|
||
| void save(const std::string& file) const override; | ||
| void load(const std::string&) override; | ||
| std::unique_ptr<algo<T>> copy() override | ||
| { | ||
| return std::make_unique<cuvs_cagra_hnswlib<T, IdxT>>(*this); | ||
| } | ||
|
|
||
| private: | ||
| cuvs_cagra<T, IdxT> cagra_build_; | ||
| hnsw_lib<T> hnswlib_search_; | ||
| }; | ||
|
|
||
| template <typename T, typename IdxT> | ||
| void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow) | ||
| { | ||
| cagra_build_.build(dataset, nrow); | ||
| } | ||
|
|
||
| template <typename T, typename IdxT> | ||
| void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_) | ||
| { | ||
| hnswlib_search_.set_search_param(param_); | ||
| } | ||
|
|
||
| template <typename T, typename IdxT> | ||
| void cuvs_cagra_hnswlib<T, IdxT>::save(const std::string& file) const | ||
|
tarang-jain marked this conversation as resolved.
Outdated
|
||
| { | ||
| cagra_build_.save_to_hnswlib(file); | ||
| } | ||
|
|
||
| template <typename T, typename IdxT> | ||
| void cuvs_cagra_hnswlib<T, IdxT>::load(const std::string& file) | ||
| { | ||
| hnswlib_search_.load(file); | ||
| hnswlib_search_.set_base_layer_only(); | ||
| } | ||
|
|
||
| template <typename T, typename IdxT> | ||
| void cuvs_cagra_hnswlib<T, IdxT>::search( | ||
| const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const | ||
| { | ||
| hnswlib_search_.search(queries, batch_size, k, neighbors, distances); | ||
| } | ||
|
|
||
| } // namespace cuvs::bench | ||
Uh oh!
There was an error while loading. Please reload this page.