Skip to content

Commit 5bd0f5b

Browse files
authored
[libcu++] Automatically bump up the release threshold of default mempools (#6718)
1 parent c54b2a6 commit 5bd0f5b

File tree

6 files changed

+245
-149
lines changed

6 files changed

+245
-149
lines changed

libcudacxx/include/cuda/__driver/driver_api.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,15 @@ __getDefaultMemPool(CUmemLocation __location, CUmemAllocationType_enum __allocat
413413
__driver_fn, "Failed to get default memory pool", &__result, &__location, __allocation_type);
414414
return __result;
415415
}
416-
# endif // _CCCL_CTK_AT_LEAST(13, 0)
416+
# else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv
417+
_CCCL_HOST_API inline ::CUmemoryPool __deviceGetDefaultMemPool(::CUdevice __device)
418+
{
419+
static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuDeviceGetDefaultMemPool);
420+
::CUmemoryPool __result = nullptr;
421+
::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to get default memory pool", &__result, __device);
422+
return __result;
423+
}
424+
# endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^
417425

418426
_CCCL_HOST_API inline ::CUdeviceptr __mallocManaged(::cuda::std::size_t __bytes, unsigned int __flags)
419427
{

libcudacxx/include/cuda/__memory_resource/device_memory_pool.h

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,16 @@
2727
#endif // _CCCL_CUDA_COMPILER(CLANG)
2828

2929
#include <cuda/__memory_resource/get_property.h>
30-
#include <cuda/__memory_resource/memory_resource_base.h>
30+
#include <cuda/__memory_resource/memory_pool_base.h>
3131
#include <cuda/__memory_resource/properties.h>
3232
#include <cuda/__runtime/api_wrapper.h>
3333
#include <cuda/std/__concepts/concept_macros.h>
3434

3535
#include <cuda/std/__cccl/prologue.h>
3636

3737
//! @file
38-
//! The \c device_memory_pool class provides an asynchronous memory resource that allocates device memory in stream
39-
//! order.
38+
//! The \c device_memory_pool class provides an asynchronous memory resource
39+
//! that allocates device memory in stream order.
4040
_CCCL_BEGIN_NAMESPACE_CUDA
4141

4242
//! @rst
@@ -45,30 +45,34 @@ _CCCL_BEGIN_NAMESPACE_CUDA
4545
//! Stream ordered memory pool
4646
//! ------------------------------
4747
//!
48-
//! ``device_memory_pool_ref`` allocates device memory using `cudaMallocFromPoolAsync / cudaFreeAsync
49-
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__ for allocation/deallocation. A
50-
//! ``device_memory_pool_ref`` is a thin wrapper around a \c cudaMemPool_t with the location type set to \c
51-
//! cudaMemLocationTypeDevice.
48+
//! ``device_memory_pool_ref`` allocates device memory using
49+
//! `cudaMallocFromPoolAsync / cudaFreeAsync
50+
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__
51+
//! for allocation/deallocation. A
52+
//! ``device_memory_pool_ref`` is a thin wrapper around a \c cudaMemPool_t with
53+
//! the location type set to \c cudaMemLocationTypeDevice.
5254
//!
5355
//! .. warning::
5456
//!
55-
//! ``device_memory_pool_ref`` does not own the pool and it is the responsibility of the user to ensure that the
56-
//! lifetime of the pool exceeds the lifetime of the ``device_memory_pool_ref``.
57+
//! ``device_memory_pool_ref`` does not own the pool and it is the
58+
//! responsibility of the user to ensure that the lifetime of the pool
59+
//! exceeds the lifetime of the ``device_memory_pool_ref``.
5760
//!
5861
//! @endrst
59-
class device_memory_pool_ref : public __memory_resource_base
62+
class device_memory_pool_ref : public __memory_pool_base
6063
{
6164
public:
6265
//! @brief Constructs the device_memory_pool_ref from a \c cudaMemPool_t.
6366
//! @param __pool The \c cudaMemPool_t used to allocate memory.
6467
_CCCL_HOST_API explicit device_memory_pool_ref(::cudaMemPool_t __pool) noexcept
65-
: __memory_resource_base(__pool)
68+
: __memory_pool_base(__pool)
6669
{}
6770

6871
device_memory_pool_ref(int) = delete;
6972
device_memory_pool_ref(::cuda::std::nullptr_t) = delete;
7073

71-
//! @brief Enables the \c device_accessible property for \c device_memory_pool_ref.
74+
//! @brief Enables the \c device_accessible property for \c
75+
//! device_memory_pool_ref.
7276
//! @relates device_memory_pool_ref
7377
_CCCL_HOST_API friend constexpr void
7478
get_property(device_memory_pool_ref const&, ::cuda::mr::device_accessible) noexcept
@@ -82,12 +86,9 @@ class device_memory_pool_ref : public __memory_resource_base
8286
//! @returns The default memory pool of the specified device.
8387
[[nodiscard]] inline device_memory_pool_ref device_default_memory_pool(::cuda::device_ref __device)
8488
{
85-
::cuda::__verify_device_supports_stream_ordered_allocations(__device.get());
86-
87-
::cudaMemPool_t __pool;
88-
_CCCL_TRY_CUDA_API(
89-
::cudaDeviceGetDefaultMemPool, "Failed to call cudaDeviceGetDefaultMemPool", &__pool, __device.get());
90-
return device_memory_pool_ref{__pool};
89+
static ::cudaMemPool_t __pool = ::cuda::__get_default_memory_pool(
90+
::CUmemLocation{::CU_MEM_LOCATION_TYPE_DEVICE, __device.get()}, ::CU_MEM_ALLOCATION_TYPE_PINNED);
91+
return device_memory_pool_ref(__pool);
9192
}
9293

9394
//! @rst
@@ -96,22 +97,28 @@ class device_memory_pool_ref : public __memory_resource_base
9697
//! Stream ordered memory resource
9798
//! ------------------------------
9899
//!
99-
//! ``device_memory_pool`` allocates device memory using `cudaMallocFromPoolAsync / cudaFreeAsync
100-
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__ for allocation/deallocation. A
101-
//! When constructed it creates an underlying \c cudaMemPool_t with the location type set to \c
102-
//! cudaMemLocationTypeDevice and owns it.
100+
//! ``device_memory_pool`` allocates device memory using
101+
//! `cudaMallocFromPoolAsync / cudaFreeAsync
102+
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__
103+
//! for allocation/deallocation. A When constructed it creates an underlying \c
104+
//! cudaMemPool_t with the location type set to \c cudaMemLocationTypeDevice and
105+
//! owns it.
103106
//!
104107
//! @endrst
105108
struct device_memory_pool : device_memory_pool_ref
106109
{
107110
using reference_type = device_memory_pool_ref;
108111

109-
//! @brief Constructs a \c device_memory_pool with the optionally specified initial pool size and release
110-
//! threshold. If the pool size grows beyond the release threshold, unused memory held by the pool will be released at
111-
//! the next synchronization event.
112-
//! @throws cuda_error if the CUDA version does not support ``cudaMallocAsync``.
113-
//! @param __device_id The device id of the device the stream pool is constructed on.
114-
//! @param __pool_properties Optional, additional properties of the pool to be created.
112+
//! @brief Constructs a \c device_memory_pool with the optionally specified
113+
//! initial pool size and release threshold. If the pool size grows beyond the
114+
//! release threshold, unused memory held by the pool will be released at the
115+
//! next synchronization event.
116+
//! @throws cuda_error if the CUDA version does not support
117+
//! ``cudaMallocAsync``.
118+
//! @param __device_id The device id of the device the stream pool is
119+
//! constructed on.
120+
//! @param __pool_properties Optional, additional properties of the pool to be
121+
//! created.
115122
_CCCL_HOST_API device_memory_pool(::cuda::device_ref __device_id, memory_pool_properties __properties = {})
116123
: device_memory_pool_ref(__create_cuda_mempool(
117124
__properties,

libcudacxx/include/cuda/__memory_resource/legacy_pinned_memory_resource.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@
2626
# include <cuda_runtime_api.h>
2727
#endif // _CCCL_CUDA_COMPILER(CLANG)
2828

29-
#include <cuda/__memory_resource/memory_resource_base.h>
29+
#include <cuda/__device/device_ref.h>
3030
#include <cuda/__memory_resource/properties.h>
31+
#include <cuda/__memory_resource/resource.h>
3132
#include <cuda/__runtime/api_wrapper.h>
33+
#include <cuda/__runtime/ensure_current_context.h>
3234
#include <cuda/std/__concepts/concept_macros.h>
3335
#include <cuda/std/__exception/throw_error.h>
3436

libcudacxx/include/cuda/__memory_resource/managed_memory_pool.h

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,47 +23,45 @@
2323

2424
#if _CCCL_CTK_AT_LEAST(13, 0)
2525

26-
# include <cuda/__memory_resource/memory_resource_base.h>
26+
# include <cuda/__memory_resource/memory_pool_base.h>
2727
# include <cuda/__memory_resource/properties.h>
2828
# include <cuda/std/__concepts/concept_macros.h>
2929
# include <cuda/std/__exception/throw_error.h>
3030

3131
# include <cuda/std/__cccl/prologue.h>
3232

3333
//! @file
34-
//! The \c managed_memory_resource class provides a memory resource that allocates managed memory.
34+
//! The \c managed_memory_resource class provides a memory resource that
35+
//! allocates managed memory.
3536
_CCCL_BEGIN_NAMESPACE_CUDA
3637

37-
[[nodiscard]] static ::cudaMemPool_t __get_default_managed_pool()
38-
{
39-
return ::cuda::__driver::__getDefaultMemPool(
40-
::CUmemLocation{::CU_MEM_LOCATION_TYPE_NONE, 0}, ::CU_MEM_ALLOCATION_TYPE_MANAGED);
41-
}
42-
4338
//! @rst
4439
//! .. _cudax-memory-resource-async:
4540
//!
4641
//! Stream ordered memory resource
4742
//! ------------------------------
4843
//!
49-
//! ``managed_memory_pool_ref`` allocates managed memory using `cudaMallocFromPoolAsync / cudaFreeAsync
50-
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__ for allocation/deallocation. A
51-
//! ``managed_memory_pool_ref`` is a thin wrapper around a \c cudaMemPool_t with the allocation type set to \c
52-
//! cudaMemAllocationTypeManaged.
44+
//! ``managed_memory_pool_ref`` allocates managed memory using
45+
//! `cudaMallocFromPoolAsync / cudaFreeAsync
46+
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__
47+
//! for allocation/deallocation. A
48+
//! ``managed_memory_pool_ref`` is a thin wrapper around a \c cudaMemPool_t with
49+
//! the allocation type set to \c cudaMemAllocationTypeManaged.
5350
//!
5451
//! .. warning::
5552
//!
56-
//! ``managed_memory_pool_ref`` does not own the pool and it is the responsibility of the user to ensure that the
57-
//! lifetime of the pool exceeds the lifetime of the ``managed_memory_pool_ref``.
53+
//! ``managed_memory_pool_ref`` does not own the pool and it is the
54+
//! responsibility of the user to ensure that the lifetime of the pool
55+
//! exceeds the lifetime of the ``managed_memory_pool_ref``.
5856
//!
5957
//! @endrst
60-
class managed_memory_pool_ref : public __memory_resource_base
58+
class managed_memory_pool_ref : public __memory_pool_base
6159
{
6260
public:
6361
//! @brief Constructs the managed_memory_pool_ref from a \c cudaMemPool_t.
6462
//! @param __pool The \c cudaMemPool_t used to allocate memory.
6563
_CCCL_HOST_API explicit managed_memory_pool_ref(::cudaMemPool_t __pool) noexcept
66-
: __memory_resource_base(__pool)
64+
: __memory_pool_base(__pool)
6765
{}
6866

6967
//! @brief Enables the \c device_accessible property
@@ -82,7 +80,9 @@ class managed_memory_pool_ref : public __memory_resource_base
8280
//! @returns The default managed memory pool.
8381
[[nodiscard]] inline managed_memory_pool_ref managed_default_memory_pool()
8482
{
85-
return managed_memory_pool_ref{::cuda::__get_default_managed_pool()};
83+
static ::cudaMemPool_t __pool = ::cuda::__get_default_memory_pool(
84+
::CUmemLocation{::CU_MEM_LOCATION_TYPE_NONE, 0}, ::CU_MEM_ALLOCATION_TYPE_MANAGED);
85+
return managed_memory_pool_ref(__pool);
8686
}
8787

8888
//! @rst
@@ -91,9 +91,11 @@ class managed_memory_pool_ref : public __memory_resource_base
9191
//! Stream ordered memory resource
9292
//! ------------------------------
9393
//!
94-
//! ``managed_memory_pool`` allocates managed memory using `cudaMallocFromPoolAsync / cudaFreeAsync
95-
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__ for allocation/deallocation. A
96-
//! When constructed it creates an underlying \c cudaMemPool_t with the allocation type set to \c
94+
//! ``managed_memory_pool`` allocates managed memory using
95+
//! `cudaMallocFromPoolAsync / cudaFreeAsync
96+
//! <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html>`__
97+
//! for allocation/deallocation. A When constructed it creates an underlying \c
98+
//! cudaMemPool_t with the allocation type set to \c
9799
//! cudaMemAllocationTypeManaged and owns it.
98100
//!
99101
//! @endrst
@@ -102,15 +104,18 @@ struct managed_memory_pool : managed_memory_pool_ref
102104
using reference_type = managed_memory_pool_ref;
103105

104106
//! @brief Constructs a \c managed_memory_pool with optional properties.
105-
//! Properties include the initial pool size and the release threshold. If the pool size grows beyond the release
106-
//! threshold, unused memory held by the pool will be released at the next synchronization event.
107-
//! @param __properties Optional, additional properties of the pool to be created.
107+
//! Properties include the initial pool size and the release threshold. If the
108+
//! pool size grows beyond the release threshold, unused memory held by the
109+
//! pool will be released at the next synchronization event.
110+
//! @param __properties Optional, additional properties of the pool to be
111+
//! created.
108112
_CCCL_HOST_API managed_memory_pool(memory_pool_properties __properties = {})
109113
: managed_memory_pool_ref(__create_cuda_mempool(
110114
__properties, ::CUmemLocation{::CU_MEM_LOCATION_TYPE_NONE, 0}, ::CU_MEM_ALLOCATION_TYPE_MANAGED))
111115
{}
112116

113-
// TODO add a constructor that accepts memory location one a type for it is added
117+
// TODO add a constructor that accepts memory location one a type for it is
118+
// added
114119

115120
~managed_memory_pool() noexcept
116121
{

0 commit comments

Comments
 (0)