Skip to content

Commit 03e887c

Browse files
mlomeli1facebook-github-bot
authored andcommitted
Cache device major version value to avoid multiple calls of getCudaDeviceProperties (facebookresearch#3950)
Summary: This diff enables to cache the device major version value so getCudaDeviceProperties() doesn't need to be called multiple times. Currently, the profiler of the code looks as so: {F1933796291} Pull Request resolved: facebookresearch#3950 Test Plan: N5114369 -- provides a toy example (2) which exhibits the following timings: Average timings before change: 3.35s Average tmings after change: 1.99s Reviewed By: algoriddle Differential Revision: D64047778 Pulled By: mlomeli1 fbshipit-source-id: 2f09373944237e80b96d40f35c6714c06f5741a9
1 parent 8b00d00 commit 03e887c

2 files changed

Lines changed: 19 additions & 7 deletions

File tree

faiss/gpu/GpuDistance.cu

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,18 @@ using namespace raft::distance;
5151
using namespace raft::neighbors;
5252
#endif
5353

54+
/// Caches device major version
55+
int device_major_version = -1;
56+
5457
bool should_use_raft(GpuDistanceParams args) {
55-
cudaDeviceProp prop;
56-
int dev = args.device >= 0 ? args.device : getCurrentDevice();
57-
cudaGetDeviceProperties(&prop, dev);
58+
if (device_major_version < 0) {
59+
cudaDeviceProp prop;
60+
int dev = args.device >= 0 ? args.device : getCurrentDevice();
61+
cudaGetDeviceProperties(&prop, dev);
62+
device_major_version = prop.major;
63+
}
5864

59-
if (prop.major < 7)
65+
if (device_major_version < 7)
6066
return false;
6167

6268
return args.use_raft;

faiss/gpu/GpuIndex.cu

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,17 @@ constexpr idx_t kAddVecSize = (idx_t)512 * 1024;
4242
// FIXME: parameterize based on algorithm need
4343
constexpr idx_t kSearchVecSize = (idx_t)32 * 1024;
4444

45+
/// Caches device major version
46+
extern int device_major_version;
47+
4548
bool should_use_raft(GpuIndexConfig config_) {
46-
cudaDeviceProp prop;
47-
cudaGetDeviceProperties(&prop, config_.device);
49+
if (device_major_version < 0) {
50+
cudaDeviceProp prop;
51+
cudaGetDeviceProperties(&prop, config_.device);
52+
device_major_version = prop.major;
53+
}
4854

49-
if (prop.major < 7)
55+
if (device_major_version < 7)
5056
return false;
5157

5258
return config_.use_raft;

0 commit comments

Comments
 (0)