cuvs/cpp/bench/ann/src/common/cuda_pinned_resource.hpp at 4442a3f3f4192f3bd5ab0da882bcd324a64a9a20 · rapidsai/cuvs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/*
 * Copyright (c) 2023-2025, NVIDIA CORPORATION.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#pragma once

#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <cstddef>

namespace raft::mr {
/**
 * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for
 * allocation/deallocation.
 *
 * This is almost the same as rmm::mr::host::pinned_memory_resource, but it has
 * device_memory_resource as base class. Pinned memory can be accessed from device,
 * and using this allocator we can create device_mdarray backed by pinned allocator.
 *
 * TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm
 * (pinned_memory_resource), but that is incompatible with the container_policy class
 * for device matrix, because the latter expects a device_memory_resource. We shall
 * revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819
 */
class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
 public:
  cuda_pinned_resource()                                               = default;
  ~cuda_pinned_resource() override                                     = default;
  cuda_pinned_resource(cuda_pinned_resource const&)                    = default;
  cuda_pinned_resource(cuda_pinned_resource&&)                         = default;
  auto operator=(cuda_pinned_resource const&) -> cuda_pinned_resource& = default;
  auto operator=(cuda_pinned_resource&&) -> cuda_pinned_resource&      = default;

 private:
  /**
   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
   *
   * The returned pointer has at least 256B alignment.
   *
   * @note Stream argument is ignored
   *
   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
   *
   * @param bytes The size, in bytes, of the allocation
   * @return void* Pointer to the newly allocated memory
   */
  auto do_allocate(std::size_t bytes, rmm::cuda_stream_view) -> void* override
  {
    void* ptr{nullptr};
    RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes));
    return ptr;
  }

  /**
   * @brief Deallocate memory pointed to by \p p.
   *
   * @note Stream argument is ignored.
   *
   * @throws Nothing.
   *
   * @param p Pointer to be deallocated
   */
  void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) noexcept override
  {
    RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
  }

  /**
   * @brief Compare this resource to another.
   *
   * Two cuda_pinned_resources always compare equal, because they can each
   * deallocate memory allocated by the other.
   *
   * @throws Nothing.
   *
   * @param other The other resource to compare to
   * @return true If the two resources are equivalent
   * @return false If the two resources are not equal
   */
  [[nodiscard]] auto do_is_equal(device_memory_resource const& other) const noexcept
    -> bool override
  {
    return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
  }
};
}  // namespace raft::mr