|
| 1 | +/* |
| 2 | + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. |
| 3 | + * SPDX-License-Identifier: Apache-2.0 |
| 4 | + */ |
| 5 | +#pragma once |
| 6 | + |
| 7 | +#include <rmm/cuda_device.hpp> |
| 8 | +#include <rmm/cuda_stream_view.hpp> |
| 9 | +#include <rmm/detail/error.hpp> |
| 10 | +#include <rmm/detail/export.hpp> |
| 11 | +#include <rmm/detail/runtime_capabilities.hpp> |
| 12 | +#include <rmm/detail/thrust_namespace.h> |
| 13 | +#include <rmm/mr/cuda_async_view_memory_resource.hpp> |
| 14 | +#include <rmm/mr/device_memory_resource.hpp> |
| 15 | + |
| 16 | +#include <cuda/std/type_traits> |
| 17 | +#include <cuda_runtime_api.h> |
| 18 | + |
| 19 | +#include <cstddef> |
| 20 | +#include <cstdint> |
| 21 | +#include <optional> |
| 22 | + |
| 23 | +namespace RMM_NAMESPACE { |
| 24 | +namespace mr { |
| 25 | +/** |
| 26 | + * @addtogroup memory_resources |
| 27 | + * @{ |
| 28 | + * @file |
| 29 | + */ |
| 30 | + |
| 31 | +/** |
| 32 | + * @brief `device_memory_resource` derived class that uses |
| 33 | + * `cudaMallocFromPoolAsync`/`cudaFreeFromPoolAsync` with a pinned memory pool |
| 34 | + * for allocation/deallocation. |
| 35 | + */ |
| 36 | +class cuda_async_pinned_memory_resource final : public device_memory_resource { |
| 37 | + public: |
| 38 | + /** |
| 39 | + * @brief Constructs a cuda_async_pinned_memory_resource with the default pinned memory pool for |
| 40 | + * the current device. |
| 41 | + * |
| 42 | + * The default pinned memory pool is the pool that is created when the device is created. |
| 43 | + * Pool properties such as the release threshold are not modified. |
| 44 | + * |
| 45 | + * @throws rmm::logic_error if the CUDA version does not support `cudaMallocFromPoolAsync` with |
| 46 | + * pinned memory pool |
| 47 | + */ |
| 48 | + cuda_async_pinned_memory_resource() |
| 49 | + { |
| 50 | + // Check if pinned memory pools are supported |
| 51 | + RMM_EXPECTS(rmm::detail::runtime_async_pinned_alloc::is_supported(), |
| 52 | + "cuda_async_pinned_memory_resource requires CUDA 13.0 or higher"); |
| 53 | + |
| 54 | +#if defined(CUDA_VERSION) && CUDA_VERSION >= RMM_MIN_ASYNC_PINNED_ALLOC_CUDA_VERSION |
| 55 | + cudaMemPool_t pinned_pool_handle{}; |
| 56 | + cudaMemLocation location{.type = cudaMemLocationTypeDevice, |
| 57 | + .id = rmm::get_current_cuda_device().value()}; |
| 58 | + RMM_CUDA_TRY( |
| 59 | + cudaMemGetDefaultMemPool(&pinned_pool_handle, &location, cudaMemAllocationTypePinned)); |
| 60 | + pool_ = cuda_async_view_memory_resource{pinned_pool_handle}; |
| 61 | +#endif |
| 62 | + } |
| 63 | + |
| 64 | + /** |
| 65 | + * @brief Returns the underlying native handle to the CUDA pool |
| 66 | + * |
| 67 | + * @return cudaMemPool_t Handle to the underlying CUDA pool |
| 68 | + */ |
| 69 | + [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); } |
| 70 | + |
| 71 | + ~cuda_async_pinned_memory_resource() override {} |
| 72 | + cuda_async_pinned_memory_resource(cuda_async_pinned_memory_resource const&) = delete; |
| 73 | + cuda_async_pinned_memory_resource(cuda_async_pinned_memory_resource&&) = delete; |
| 74 | + cuda_async_pinned_memory_resource& operator=(cuda_async_pinned_memory_resource const&) = delete; |
| 75 | + cuda_async_pinned_memory_resource& operator=(cuda_async_pinned_memory_resource&&) = delete; |
| 76 | + |
| 77 | + private: |
| 78 | + cuda_async_view_memory_resource pool_{}; |
| 79 | + |
| 80 | + /** |
| 81 | + * @brief Allocates memory of size at least \p bytes. |
| 82 | + * |
| 83 | + * The returned pointer will have at minimum 256 byte alignment. |
| 84 | + * |
| 85 | + * @param bytes The size of the allocation |
| 86 | + * @param stream Stream on which to perform allocation |
| 87 | + * @return void* Pointer to the newly allocated memory |
| 88 | + */ |
| 89 | + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override |
| 90 | + { |
| 91 | + return pool_.allocate(stream, bytes); |
| 92 | + } |
| 93 | + |
| 94 | + /** |
| 95 | + * @brief Deallocate memory pointed to by \p p. |
| 96 | + * |
| 97 | + * @param ptr Pointer to be deallocated |
| 98 | + * @param bytes The size in bytes of the allocation. This must be equal to the |
| 99 | + * value of `bytes` that was passed to the `allocate` call that returned `p`. |
| 100 | + * @param stream Stream on which to perform deallocation |
| 101 | + */ |
| 102 | + void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override |
| 103 | + { |
| 104 | + pool_.deallocate(stream, ptr, bytes); |
| 105 | + } |
| 106 | + |
| 107 | + /** |
| 108 | + * @brief Compare this resource to another. |
| 109 | + * |
| 110 | + * @param other The other resource to compare to |
| 111 | + * @return true If the two resources are equivalent |
| 112 | + * @return false If the two resources are not equal |
| 113 | + */ |
| 114 | + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override |
| 115 | + { |
| 116 | + auto const* async_mr = dynamic_cast<cuda_async_pinned_memory_resource const*>(&other); |
| 117 | + return (async_mr != nullptr) && (this->pool_handle() == async_mr->pool_handle()); |
| 118 | + } |
| 119 | + |
| 120 | + friend auto get_property(cuda_async_pinned_memory_resource const&, |
| 121 | + cuda::mr::device_accessible) noexcept |
| 122 | + { |
| 123 | + return cuda::mr::device_accessible{}; |
| 124 | + } |
| 125 | + friend auto get_property(cuda_async_pinned_memory_resource const&, |
| 126 | + cuda::mr::host_accessible) noexcept |
| 127 | + { |
| 128 | + return cuda::mr::host_accessible{}; |
| 129 | + } |
| 130 | +}; |
| 131 | + |
| 132 | +// static property checks |
| 133 | +static_assert(rmm::detail::polyfill::resource<cuda_async_pinned_memory_resource>); |
| 134 | +static_assert(rmm::detail::polyfill::async_resource<cuda_async_pinned_memory_resource>); |
| 135 | +static_assert(rmm::detail::polyfill::resource_with<cuda_async_pinned_memory_resource, |
| 136 | + cuda::mr::host_accessible, |
| 137 | + cuda::mr::device_accessible>); |
| 138 | +static_assert(rmm::detail::polyfill::async_resource_with<cuda_async_pinned_memory_resource, |
| 139 | + cuda::mr::host_accessible, |
| 140 | + cuda::mr::device_accessible>); |
| 141 | +/** @} */ // end of group |
| 142 | +} // namespace mr |
| 143 | +} // namespace RMM_NAMESPACE |
0 commit comments