Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fef0d0a
operator_properties
fbusato Jan 15, 2026
48c0f28
rewrote with traits
fbusato Jan 15, 2026
55b2c8f
fix bit operations
fbusato Jan 15, 2026
2a3cec4
add documentation and negative tests
fbusato Jan 16, 2026
02e5920
fix compiler warnings
fbusato Jan 16, 2026
451a335
use constexpr variable
fbusato Jan 16, 2026
9084770
target CCCL 3.3.0
fbusato Jan 16, 2026
871172c
add custom data type test
fbusato Jan 20, 2026
b38d109
fix compiler warnings
fbusato Jan 20, 2026
c2e08fa
Merge branch 'main' into operator-properties
fbusato Jan 20, 2026
e32bc85
use constexpr function
fbusato Jan 20, 2026
b35234a
Merge branch 'operator-properties' of github.com:fbusato/cccl into op…
fbusato Jan 20, 2026
866cf15
fix cuda-clang warning
fbusato Jan 21, 2026
b1935b0
fix MSVC warning
fbusato Jan 21, 2026
0f4694e
Update docs/libcudacxx/extended_api/functional/operator_properties.rst
fbusato Jan 21, 2026
4680422
Update libcudacxx/include/cuda/__functional/operator_properties.h
fbusato Jan 21, 2026
efe4eb4
use constexpr variables only
fbusato Jan 21, 2026
d25b269
formatting
fbusato Jan 21, 2026
0e031e4
replace CUB is_identity_v
fbusato Jan 23, 2026
6114f14
refactor with constexpr variables (where possible)
fbusato Jan 28, 2026
b860905
handle const volatile
fbusato Jan 28, 2026
b6f5af8
a few fixes
fbusato Jan 28, 2026
057bb0c
handle noexcept
fbusato Jan 28, 2026
4c0986f
fix compiler warnings
fbusato Jan 29, 2026
91f50b4
fix msvc warnings
fbusato Feb 3, 2026
560bfda
fix GCC7 error
fbusato Feb 3, 2026
d587bca
Merge branch 'main' into operator-properties
fbusato Feb 3, 2026
5876a89
Merge branch 'main' into operator-properties
fbusato Feb 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions c2h/include/c2h/operator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,61 @@
// SPDX-License-Identifier: BSD-3-Clause
#pragma once

#include <cub/thread/thread_operators.cuh>

#include <cuda/functional>
#include <cuda/std/functional>
#include <cuda/std/limits>
#include <cuda/type_traits>

#include <c2h/custom_type.h>
#include <c2h/extended_types.h>
#include <c2h/test_util_vec.h>

/***********************************************************************************************************************
* CUB operator to identity
**********************************************************************************************************************/

template <typename Operator, typename T>
inline constexpr T identity_v = cub::detail::identity_v<Operator, T>;
template <typename Operator, typename T, typename = void>
inline constexpr T identity_v = cuda::identity_element<Operator, T>();

template <typename T>
inline const T identity_v<cuda::std::plus<>, T> = T{}; // e.g. short2, float2, complex<__half> etc.

/***********************************************************************************************************************
* half_t specializations
**********************************************************************************************************************/

template <>
inline const half_t identity_v<cuda::std::plus<>, half_t> = half_t{0.0f};

template <>
inline const half_t identity_v<cuda::std::multiplies<>, half_t> = half_t{1.0f};

template <>
inline const half_t identity_v<cuda::minimum<>, half_t> = cuda::std::numeric_limits<half_t>::max();

template <>
inline const half_t identity_v<cuda::maximum<>, half_t> = cuda::std::numeric_limits<half_t>::lowest();

/***********************************************************************************************************************
* bfloat16_t specializations
**********************************************************************************************************************/

template <>
inline const bfloat16_t identity_v<cuda::std::plus<>, bfloat16_t> = bfloat16_t{0.0f};

template <>
inline const bfloat16_t identity_v<cuda::std::multiplies<>, bfloat16_t> = bfloat16_t{1.0f};

template <>
inline const bfloat16_t identity_v<cuda::minimum<>, bfloat16_t> = cuda::std::numeric_limits<bfloat16_t>::max();

template <>
inline const bfloat16_t identity_v<cuda::maximum<>, bfloat16_t> = cuda::std::numeric_limits<bfloat16_t>::lowest();

/***********************************************************************************************************************
* short2, ushort2, float2 specializations
**********************************************************************************************************************/

template <>
inline constexpr short2 identity_v<cuda::maximum<>, short2> =
short2{cuda::std::numeric_limits<int16_t>::lowest(), cuda::std::numeric_limits<int16_t>::lowest()};
Expand Down
78 changes: 0 additions & 78 deletions cub/cub/thread/thread_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -579,84 +579,6 @@ template <typename Operator>
return op;
}
}

//----------------------------------------------------------------------------------------------------------------------
// Identity

template <typename Op, typename T = void>
inline constexpr T identity_v;

template <typename T>
inline constexpr T identity_v<::cuda::minimum<>, T> = ::cuda::std::numeric_limits<T>::max();

template <typename T>
inline constexpr T identity_v<::cuda::minimum<T>, T> = ::cuda::std::numeric_limits<T>::max();

template <typename T>
inline constexpr T identity_v<::cuda::minimum<T>, void> = ::cuda::std::numeric_limits<T>::max();

template <typename T>
inline constexpr T identity_v<::cuda::maximum<>, T> = ::cuda::std::numeric_limits<T>::lowest();

template <typename T>
inline constexpr T identity_v<::cuda::maximum<T>, T> = ::cuda::std::numeric_limits<T>::lowest();

template <typename T>
inline constexpr T identity_v<::cuda::maximum<T>, void> = ::cuda::std::numeric_limits<T>::lowest();

template <typename T>
inline constexpr T identity_v<::cuda::std::plus<T>, T> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::plus<>, T> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::plus<T>, void> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_and<>, T> = static_cast<T>(~T{});

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_and<T>, T> = static_cast<T>(~T{});

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_and<T>, void> = static_cast<T>(~T{});

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_or<>, T> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_or<T>, T> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_or<T>, void> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_xor<>, T> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_xor<T>, T> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::bit_xor<T>, void> = T{};

template <typename T>
inline constexpr T identity_v<::cuda::std::logical_and<>, T> = true;

template <typename T>
inline constexpr T identity_v<::cuda::std::logical_and<T>, T> = true;

template <typename T>
inline constexpr T identity_v<::cuda::std::logical_and<T>, void> = true;

template <typename T>
inline constexpr T identity_v<::cuda::std::logical_or<>, T> = false;

template <typename T>
inline constexpr T identity_v<::cuda::std::logical_or<T>, T> = false;

template <typename T>
inline constexpr T identity_v<::cuda::std::logical_or<T>, void> = false;
} // namespace detail

#endif // !_CCCL_DOXYGEN_INVOKED
Expand Down
7 changes: 4 additions & 3 deletions cub/test/catch2_test_thread_scan_exclusive_partial.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <c2h/catch2_test_helper.h>
#include <c2h/extended_types.h>
#include <c2h/generators.h>
#include <c2h/operator.cuh>

constexpr int max_size = 16;
constexpr int num_seeds = 3;
Expand Down Expand Up @@ -158,7 +159,7 @@ C2H_TEST("ThreadScanExclusive Integral Type Tests",
using dist_param = dist_interval<value_t, op_t, num_items, accum_t, output_t>;
using filler_dist_param = dist_interval<accum_t, op_t, num_items, accum_t, output_t>;
constexpr auto scan_op = op_t{};
constexpr auto operator_identity = cub_operator_to_identity<accum_t, op_t>::value();
constexpr auto operator_identity = cuda::identity_element<op_t, accum_t>();
const int valid_items = GENERATE_COPY(
take(1, random(2, cuda::std::max(2, num_items - 1))),
take(1, random(num_items + 2, cuda::std::numeric_limits<int>::max())),
Expand Down Expand Up @@ -215,7 +216,7 @@ C2H_TEST("ThreadScanExclusive Floating-Point Type Tests",
using dist_param = dist_interval<value_t, op_t, num_items, accum_t, output_t>;
using filler_dist_param = dist_interval<accum_t, op_t, num_items, accum_t, output_t>;
constexpr auto scan_op = op_t{};
const auto operator_identity = cub_operator_to_identity<accum_t, op_t>::value();
const auto operator_identity = cuda::identity_element<op_t, accum_t>();
const int valid_items = GENERATE_COPY(
take(1, random(2, cuda::std::max(2, num_items - 1))),
take(1, random(num_items + 2, cuda::std::numeric_limits<int>::max())),
Expand Down Expand Up @@ -274,7 +275,7 @@ C2H_TEST("ThreadScanExclusive Narrow PrecisionType Tests",
using dist_param = dist_interval<value_t, op_t, num_items, accum_t, output_t>;
using filler_dist_param = dist_interval<accum_t, op_t, num_items, accum_t, output_t>;
constexpr auto scan_op = unwrap_op(std::true_type{}, op_t{});
const auto operator_identity = cub_operator_to_identity<accum_t, op_t>::value();
const auto operator_identity = identity_v<op_t, accum_t>;
const int valid_items = GENERATE_COPY(
take(1, random(2, cuda::std::max(2, num_items - 1))),
take(1, random(num_items + 2, cuda::std::numeric_limits<int>::max())),
Expand Down
7 changes: 4 additions & 3 deletions cub/test/catch2_test_thread_scan_inclusive_partial.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <c2h/catch2_test_helper.h>
#include <c2h/extended_types.h>
#include <c2h/generators.h>
#include <c2h/operator.cuh>

constexpr int max_size = 16;
constexpr int num_seeds = 3;
Expand Down Expand Up @@ -158,7 +159,7 @@ C2H_TEST("ThreadScanInclusive Integral Type Tests",
using dist_param = dist_interval<value_t, op_t, num_items, accum_t, output_t>;
using filler_dist_param = dist_interval<accum_t, op_t, num_items, accum_t, output_t>;
constexpr auto scan_op = op_t{};
constexpr auto operator_identity = cub_operator_to_identity<accum_t, op_t>::value();
constexpr auto operator_identity = cuda::identity_element<op_t, accum_t>();
const int valid_items = GENERATE_COPY(
take(1, random(2, cuda::std::max(2, num_items - 1))),
take(1, random(num_items + 2, cuda::std::numeric_limits<int>::max())),
Expand Down Expand Up @@ -218,7 +219,7 @@ C2H_TEST("ThreadScanInclusive Floating-Point Type Tests",
using dist_param = dist_interval<value_t, op_t, num_items, accum_t, output_t>;
using filler_dist_param = dist_interval<accum_t, op_t, num_items, accum_t, output_t>;
constexpr auto scan_op = op_t{};
const auto operator_identity = cub_operator_to_identity<accum_t, op_t>::value();
const auto operator_identity = cuda::identity_element<op_t, accum_t>();
const int valid_items = GENERATE_COPY(
take(1, random(2, cuda::std::max(2, num_items - 1))),
take(1, random(num_items + 2, cuda::std::numeric_limits<int>::max())),
Expand Down Expand Up @@ -280,7 +281,7 @@ C2H_TEST("ThreadScanInclusive Narrow PrecisionType Tests",
using dist_param = dist_interval<value_t, op_t, num_items, accum_t, output_t>;
using filler_dist_param = dist_interval<accum_t, op_t, num_items, accum_t, output_t>;
constexpr auto scan_op = unwrap_op(std::true_type{}, op_t{});
const auto operator_identity = cub_operator_to_identity<accum_t, op_t>::value();
const auto operator_identity = identity_v<op_t, accum_t>;
const int valid_items = GENERATE_COPY(
take(1, random(2, cuda::std::max(2, num_items - 1))),
take(1, random(num_items + 2, cuda::std::numeric_limits<int>::max())),
Expand Down
91 changes: 12 additions & 79 deletions cub/test/thread_reduce/catch2_test_thread_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -136,76 +136,6 @@ struct cub_operator_to_std<T, cuda::maximum<>>
template <typename T, typename Operator>
using cub_operator_to_std_t = typename cub_operator_to_std<T, Operator>::type;

/***********************************************************************************************************************
* CUB operator to identity
**********************************************************************************************************************/

template <typename T, typename Operator, typename = void>
struct cub_operator_to_identity;

template <typename T>
struct cub_operator_to_identity<T, cuda::std::plus<>>
{
static constexpr T value()
{
return T{};
}
};

template <typename T>
struct cub_operator_to_identity<T, cuda::std::multiplies<>>
{
static constexpr T value()
{
return T{1};
}
};

template <typename T>
struct cub_operator_to_identity<T, cuda::std::bit_and<>>
{
static constexpr T value()
{
return static_cast<T>(~T{0});
}
};

template <typename T>
struct cub_operator_to_identity<T, cuda::std::bit_or<>>
{
static constexpr T value()
{
return T{0};
}
};

template <typename T>
struct cub_operator_to_identity<T, cuda::std::bit_xor<>>
{
static constexpr T value()
{
return T{0};
}
};

template <typename T>
struct cub_operator_to_identity<T, cuda::minimum<>>
{
static constexpr T value()
{
return ::std::numeric_limits<T>::max();
}
};

template <typename T>
struct cub_operator_to_identity<T, cuda::maximum<>>
{
static constexpr T value()
{
return ::std::numeric_limits<T>::min();
}
};

/***********************************************************************************************************************
* Type list definition
**********************************************************************************************************************/
Expand Down Expand Up @@ -341,9 +271,10 @@ constexpr int num_seeds = 10;
C2H_TEST("ThreadReduce Integral Type Tests", "[reduce][thread]", integral_type_list, cub_operator_integral_list)
{
using value_t = c2h::get<0, TestType>;
constexpr auto reduce_op = c2h::get<1, TestType>{};
constexpr auto std_reduce_op = cub_operator_to_std_t<value_t, c2h::get<1, TestType>>{};
constexpr auto operator_identity = cub_operator_to_identity<value_t, c2h::get<1, TestType>>::value();
using op_t = c2h::get<1, TestType>;
constexpr auto reduce_op = op_t{};
constexpr auto std_reduce_op = cub_operator_to_std_t<value_t, op_t>{};
constexpr auto operator_identity = cuda::identity_element<op_t, value_t>();
CAPTURE(c2h::type_name<value_t>(), max_size, c2h::type_name<decltype(reduce_op)>());
c2h::device_vector<value_t> d_in(max_size);
c2h::device_vector<value_t> d_out(1);
Expand All @@ -360,9 +291,10 @@ C2H_TEST("ThreadReduce Integral Type Tests", "[reduce][thread]", integral_type_l
C2H_TEST("ThreadReduce Floating-Point Type Tests", "[reduce][thread]", fp_type_list, cub_operator_fp_list)
{
using value_t = c2h::get<0, TestType>;
constexpr auto reduce_op = c2h::get<1, TestType>{};
constexpr auto std_reduce_op = cub_operator_to_std_t<value_t, c2h::get<1, TestType>>{};
const auto operator_identity = cub_operator_to_identity<value_t, c2h::get<1, TestType>>::value();
using op_t = c2h::get<1, TestType>;
constexpr auto reduce_op = op_t{};
constexpr auto std_reduce_op = cub_operator_to_std_t<value_t, op_t>{};
const auto operator_identity = cuda::identity_element<op_t, value_t>();
CAPTURE(c2h::type_name<value_t>(), max_size, c2h::type_name<decltype(reduce_op)>());
c2h::device_vector<value_t> d_in(max_size);
c2h::device_vector<value_t> d_out(1);
Expand All @@ -384,9 +316,10 @@ C2H_TEST("ThreadReduce Narrow PrecisionType Tests",
cub_operator_fp_list)
{
using value_t = c2h::get<0, TestType>;
constexpr auto reduce_op = c2h::get<1, TestType>{};
constexpr auto std_reduce_op = cub_operator_to_std_t<float, c2h::get<1, TestType>>{};
const auto operator_identity = cub_operator_to_identity<float, c2h::get<1, TestType>>::value();
using op_t = c2h::get<1, TestType>;
constexpr auto reduce_op = op_t{};
constexpr auto std_reduce_op = cub_operator_to_std_t<float, op_t>{};
const auto operator_identity = cuda::identity_element<op_t, float>();
c2h::device_vector<value_t> d_in(max_size);
c2h::device_vector<value_t> d_out(1);
c2h::gen(C2H_SEED(num_seeds), d_in, value_t{1.0f}, value_t{2.0f});
Expand Down
Loading