Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cudax/test/launch/launch_smoke.cu
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ struct dynamic_smem_single
template <typename Config>
__device__ void operator()(Config config)
{
decltype(auto) dynamic_smem = cuda::dynamic_shared_memory_view(config);
decltype(auto) dynamic_smem = cuda::dynamic_shared_memory(config);
static_assert(::cuda::std::is_same_v<SmemType&, decltype(dynamic_smem)>);
CUDAX_REQUIRE(::cuda::device::is_object_from(dynamic_smem, ::cuda::device::address_space::shared));
kernel_run_proof = true;
Expand All @@ -101,7 +101,7 @@ struct dynamic_smem_span
template <typename Config>
__device__ void operator()(Config config, int size)
{
auto dynamic_smem = cuda::dynamic_shared_memory_view(config);
auto dynamic_smem = cuda::dynamic_shared_memory(config);
static_assert(decltype(dynamic_smem)::extent == Extent);
static_assert(::cuda::std::is_same_v<SmemType&, decltype(dynamic_smem[1])>);
CUDAX_REQUIRE(dynamic_smem.size() == size);
Expand Down
135 changes: 91 additions & 44 deletions libcudacxx/include/cuda/__launch/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ inline constexpr ::cuda::std::size_t __max_portable_dyn_smem_size = 48 * 1024;
* This type can be constructed with dynamic_shared_memory helper function.
*
* When launch configuration contains this option, that configuration can be
* then passed to dynamic_shared_memory_view to get the view_type over the
* then passed to dynamic_shared_memory to get the view_type over the
* dynamic shared memory. It is also possible to obtain that memory through
* the original extern __shared__ variable[] declaration.
*
Expand All @@ -213,14 +213,14 @@ inline constexpr ::cuda::std::size_t __max_portable_dyn_smem_size = 48 * 1024;
* template <typename Configuration>
* __global__ void kernel(Configuration conf)
* {
* auto dynamic_shared = cuda::dynamic_shared_memory_view(conf);
* auto dynamic_shared = cuda::dynamic_shared_memory(conf);
* dynamic_shared[0] = 1;
* }
*
* void kernel_launch(cuda::stream_ref stream) {
* auto dims = cuda::make_hierarchy(cuda::block<128>(), cuda::grid(4));
* auto conf = cuda::make_configuration(dims,
* dynamic_shared_memory<int[128]>());
* cuda::dynamic_shared_memory<int[128]>());
*
* cuda::launch(stream, conf, kernel);
* }
Expand All @@ -239,7 +239,7 @@ inline constexpr ::cuda::std::size_t __max_portable_dyn_smem_size = 48 * 1024;
* per block
*/
template <class _Tp>
class _CCCL_DECLSPEC_EMPTY_BASES dynamic_shared_memory
class _CCCL_DECLSPEC_EMPTY_BASES dynamic_shared_memory_option
: __dyn_smem_option_base<_Tp>
, public __detail::launch_option
{
Expand All @@ -258,48 +258,11 @@ class _CCCL_DECLSPEC_EMPTY_BASES dynamic_shared_memory
using typename __base_type::value_type; //!< Value type of the dynamic
//!< shared memory elements.
using typename __base_type::view_type; //!< The view type returned by the
//!< cuda::dynamic_shared_memory_view(config).
//!< cuda::dynamic_shared_memory(config).

static constexpr bool is_relevant_on_device = true;
static constexpr __detail::launch_option_kind kind = __detail::launch_option_kind::dynamic_shared_memory;

_CCCL_HIDE_FROM_ABI constexpr dynamic_shared_memory() noexcept = default;

_CCCL_HOST_API constexpr dynamic_shared_memory(non_portable_t) noexcept
: __non_portable_{true}
{}

_CCCL_TEMPLATE(class _Tp2 = _Tp)
_CCCL_REQUIRES((!::cuda::std::is_unbounded_array_v<_Tp2>) )
_CCCL_HOST_API constexpr dynamic_shared_memory() noexcept
{
static_assert(sizeof(_Tp2) <= __max_portable_dyn_smem_size, "portable dynamic shared memory limit exceeded");
}

_CCCL_TEMPLATE(class _Tp2 = _Tp)
_CCCL_REQUIRES((!::cuda::std::is_unbounded_array_v<_Tp2>) )
_CCCL_HOST_API constexpr dynamic_shared_memory(non_portable_t) noexcept
: __non_portable_{true}
{}

_CCCL_TEMPLATE(class _Tp2 = _Tp)
_CCCL_REQUIRES(::cuda::std::is_unbounded_array_v<_Tp2>)
_CCCL_HOST_API constexpr dynamic_shared_memory(::cuda::std::size_t __n)
: __base_type{__n}
{
if (__n * sizeof(value_type) > __max_portable_dyn_smem_size)
{
::cuda::std::__throw_invalid_argument("portable dynamic shared memory limit exceeded");
}
}

_CCCL_TEMPLATE(class _Tp2 = _Tp)
_CCCL_REQUIRES(::cuda::std::is_unbounded_array_v<_Tp2>)
_CCCL_HOST_API constexpr dynamic_shared_memory(::cuda::std::size_t __n, non_portable_t) noexcept
: __base_type{__n}
, __non_portable_{true}
{}

//! @brief Gets the size of the dynamic shared memory in bytes.
[[nodiscard]] _CCCL_API constexpr ::cuda::std::size_t size_bytes() const noexcept
{
Expand Down Expand Up @@ -328,11 +291,32 @@ class _CCCL_DECLSPEC_EMPTY_BASES dynamic_shared_memory
return view_type{__ptr, __base_type::__n_};
}
}

// Helper function to access private constructors
static constexpr dynamic_shared_memory_option __create(bool __non_portable = false) noexcept
{
return dynamic_shared_memory_option{__non_portable};
}

static constexpr dynamic_shared_memory_option __create(::cuda::std::size_t __n, bool __non_portable = false) noexcept
{
return dynamic_shared_memory_option{__n, __non_portable};
}

private:
_CCCL_HOST_API constexpr dynamic_shared_memory_option(bool __non_portable = false) noexcept
: __non_portable_{__non_portable}
{}

_CCCL_HOST_API constexpr dynamic_shared_memory_option(::cuda::std::size_t __n, bool __non_portable = false) noexcept
: __base_type{__n}
, __non_portable_{__non_portable}
{}
};

template <class _Tp>
[[nodiscard]] ::cudaError_t __apply_launch_option(
const dynamic_shared_memory<_Tp>& __opt, ::CUlaunchConfig& __config, ::CUfunction __kernel) noexcept
const dynamic_shared_memory_option<_Tp>& __opt, ::CUlaunchConfig& __config, ::CUfunction __kernel) noexcept
{
::cudaError_t __status = ::cudaSuccess;

Expand Down Expand Up @@ -389,6 +373,69 @@ template <class _Tp>
return ::cudaSuccess;
}

/**
* @brief Function that creates dynamic_shared_memory_option for non-unbounded array types
*
* @tparam _Tp Type intended to be stored in dynamic shared memory (must not be an unbounded array)
* @return dynamic_shared_memory_option<_Tp> instance
*/
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES((!::cuda::std::is_unbounded_array_v<_Tp>) )
[[nodiscard]] _CCCL_HOST_API constexpr dynamic_shared_memory_option<_Tp> dynamic_shared_memory() noexcept
{
static_assert(sizeof(_Tp) <= __max_portable_dyn_smem_size, "portable dynamic shared memory limit exceeded");
return dynamic_shared_memory_option<_Tp>::__create(false);
}

/**
* @brief Function that creates dynamic_shared_memory_option for non-unbounded array types with non-portable flag
*
* @tparam _Tp Type intended to be stored in dynamic shared memory (must not be an unbounded array)
* @param __non_portable Flag indicating non-portable size
* @return dynamic_shared_memory_option<_Tp> instance
*/
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES((!::cuda::std::is_unbounded_array_v<_Tp>) )
[[nodiscard]] _CCCL_HOST_API constexpr dynamic_shared_memory_option<_Tp> dynamic_shared_memory(non_portable_t) noexcept
{
return dynamic_shared_memory_option<_Tp>::__create(true);
}

/**
* @brief Function that creates dynamic_shared_memory_option for unbounded array types
*
* @tparam _Tp Unbounded array type
* @param __n Number of elements in the dynamic shared memory
* @return dynamic_shared_memory_option<_Tp> instance
*/
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(::cuda::std::is_unbounded_array_v<_Tp>)
[[nodiscard]] _CCCL_HOST_API constexpr dynamic_shared_memory_option<_Tp> dynamic_shared_memory(::cuda::std::size_t __n)
{
using value_type = typename dynamic_shared_memory_option<_Tp>::value_type;
if (__n * sizeof(value_type) > __max_portable_dyn_smem_size)
{
::cuda::std::__throw_invalid_argument("portable dynamic shared memory limit exceeded");
}
return dynamic_shared_memory_option<_Tp>::__create(__n, false);
}

/**
* @brief Function that creates dynamic_shared_memory_option for unbounded array types with non-portable flag
*
* @tparam _Tp Unbounded array type
* @param __n Number of elements in the dynamic shared memory
* @param __non_portable Flag indicating non-portable size
* @return dynamic_shared_memory_option<_Tp> instance
*/
_CCCL_TEMPLATE(class _Tp)
_CCCL_REQUIRES(::cuda::std::is_unbounded_array_v<_Tp>)
[[nodiscard]] _CCCL_HOST_API constexpr dynamic_shared_memory_option<_Tp>
dynamic_shared_memory(::cuda::std::size_t __n, non_portable_t) noexcept
{
return dynamic_shared_memory_option<_Tp>::__create(__n, true);
}

/**
* @brief Launch option specifying launch priority
*
Expand Down Expand Up @@ -733,7 +780,7 @@ template <typename Dimensions, typename... Options>
# if _CCCL_CUDA_COMPILATION()

template <class _Dims, class... _Opts>
_CCCL_DEVICE_API decltype(auto) dynamic_shared_memory_view(const kernel_config<_Dims, _Opts...>& __config) noexcept
_CCCL_DEVICE_API decltype(auto) dynamic_shared_memory(const kernel_config<_Dims, _Opts...>& __config) noexcept
{
auto& __opt = __detail::find_option_in_tuple<__detail::launch_option_kind::dynamic_shared_memory>(__config.options);
using _Opt = ::cuda::std::remove_reference_t<decltype(__opt)>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ struct TestKernel
template <class Config>
__device__ void operator()(const Config& config)
{
static_assert(cuda::std::is_same_v<View, decltype(cuda::dynamic_shared_memory_view(config))>);
static_assert(noexcept(cuda::dynamic_shared_memory_view(config)));
static_assert(cuda::std::is_same_v<View, decltype(cuda::dynamic_shared_memory(config))>);
static_assert(noexcept(cuda::dynamic_shared_memory(config)));

write_smem(cuda::dynamic_shared_memory_view(config));
write_smem(cuda::dynamic_shared_memory(config));
}

__device__ void write_smem(T& view)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ struct dynamic_smem_single
template <typename Config>
__device__ void operator()(Config config)
{
decltype(auto) dynamic_smem = cuda::dynamic_shared_memory_view(config);
decltype(auto) dynamic_smem = cuda::dynamic_shared_memory(config);
static_assert(::cuda::std::is_same_v<SmemType&, decltype(dynamic_smem)>);
CCCLRT_REQUIRE_DEVICE(::cuda::device::is_object_from(dynamic_smem, ::cuda::device::address_space::shared));
kernel_run_proof = true;
Expand All @@ -95,7 +95,7 @@ struct dynamic_smem_span
template <typename Config>
__device__ void operator()(Config config, int size)
{
auto dynamic_smem = cuda::dynamic_shared_memory_view(config);
auto dynamic_smem = cuda::dynamic_shared_memory(config);
static_assert(decltype(dynamic_smem)::extent == Extent);
static_assert(::cuda::std::is_same_v<SmemType&, decltype(dynamic_smem[1])>);
CCCLRT_REQUIRE_DEVICE(dynamic_smem.size() == size);
Expand Down
Loading