-
Notifications
You must be signed in to change notification settings - Fork 353
Implement PCG64 as extension #6292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
438fc34
56d9811
f587dd2
baacf43
0f82787
c67254e
965e201
d635d9d
75adb20
f0c34ad
03c2e23
54275c6
b89e216
f189363
009ada6
a628a81
46b7eae
78e1f4a
a289174
5d9cdae
bca96fb
57c5b9a
7f60a3b
d2b51d8
36c6a1e
a4cfa4f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,274 @@ | ||||||
| //===----------------------------------------------------------------------===// | ||||||
| // | ||||||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||||||
| // under the Apache License v2.0 with LLVM Exceptions. | ||||||
| // See https://llvm.org/LICENSE.txt for license information. | ||||||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
| // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. | ||||||
| // | ||||||
| //===----------------------------------------------------------------------===// | ||||||
|
|
||||||
| #ifndef _CUDA_STD___RANDOM_PCG_ENGINE_H | ||||||
| #define _CUDA_STD___RANDOM_PCG_ENGINE_H | ||||||
|
|
||||||
| #include <cuda/std/detail/__config> | ||||||
|
|
||||||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||||||
| # pragma GCC system_header | ||||||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||||||
| # pragma clang system_header | ||||||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||||||
| # pragma system_header | ||||||
| #endif // no system header | ||||||
|
|
||||||
| #include <cuda/std/__bit/rotate.h> | ||||||
| #include <cuda/std/__limits/numeric_limits.h> | ||||||
| #include <cuda/std/__random/is_seed_sequence.h> | ||||||
| #include <cuda/std/__type_traits/enable_if.h> | ||||||
| #include <cuda/std/__type_traits/integral_constant.h> | ||||||
| #include <cuda/std/__utility/pair.h> | ||||||
| #include <cuda/std/array> | ||||||
| #include <cuda/std/cstdint> | ||||||
|
|
||||||
| #include <cuda/std/__cccl/prologue.h> | ||||||
|
|
||||||
| _CCCL_BEGIN_NAMESPACE_CUDA | ||||||
|
|
||||||
| #if _CCCL_HAS_INT128() | ||||||
|
|
||||||
| //! @brief A 64-bit permuted congruential generator (PCG) random number engine. | ||||||
| //! | ||||||
| //! This is a high-quality, fast random number generator based on the PCG family | ||||||
| //! of algorithms. It uses a 128-bit internal state and produces 64-bit output | ||||||
| //! values using a permutation function applied to a linear congruential generator. | ||||||
| //! | ||||||
| //! Most users should use the predefined `pcg64` type alias instead of this class directly. | ||||||
| //! | ||||||
| //! @tparam _AHi The high 64 bits of the multiplier constant for the LCG. | ||||||
| //! @tparam _ALo The low 64 bits of the multiplier constant for the LCG. | ||||||
| //! @tparam _CHi The high 64 bits of the increment constant for the LCG. | ||||||
| //! @tparam _CLo The low 64 bits of the increment constant for the LCG. | ||||||
| //! | ||||||
| //! @note This class requires compiler support for 128-bit integers. | ||||||
| //! | ||||||
| //! @see https://www.pcg-random.org/ for details on the PCG family of generators. | ||||||
| template <::cuda::std::uint64_t _AHi, ::cuda::std::uint64_t _ALo, ::cuda::std::uint64_t _CHi, ::cuda::std::uint64_t _CLo> | ||||||
| class pcg64_engine | ||||||
| { | ||||||
| public: | ||||||
| using result_type = ::cuda::std::uint64_t; | ||||||
|
|
||||||
| private: | ||||||
| using __bitcount_t = ::cuda::std::uint8_t; | ||||||
|
|
||||||
| static constexpr __uint128_t __multiplier = (static_cast<__uint128_t>(_AHi) << 64) | _ALo; | ||||||
| static constexpr __uint128_t __increment = (static_cast<__uint128_t>(_CHi) << 64) | _CLo; | ||||||
|
|
||||||
| [[nodiscard]] _CCCL_API static constexpr result_type __output_transform(__uint128_t __internal) noexcept | ||||||
| { | ||||||
| const auto __rot = static_cast<__bitcount_t>(__internal >> 122); | ||||||
| __internal ^= __internal >> 64; | ||||||
| return ::cuda::std::rotr(result_type(__internal), __rot); | ||||||
| } | ||||||
|
|
||||||
| [[nodiscard]] _CCCL_API static constexpr ::cuda::std::pair<__uint128_t, __uint128_t> | ||||||
| __power_mod(__uint128_t __delta) noexcept | ||||||
| { | ||||||
| __uint128_t __acc_mult = 1; | ||||||
| __uint128_t __acc_plus = 0; | ||||||
| __uint128_t __cur_mult = __multiplier; | ||||||
| __uint128_t __cur_plus = __increment; | ||||||
| while (__delta > 0) | ||||||
| { | ||||||
| if (__delta & 1) | ||||||
| { | ||||||
| __acc_mult *= __cur_mult; | ||||||
| __acc_plus = __acc_plus * __cur_mult + __cur_plus; | ||||||
| } | ||||||
| __cur_plus = (__cur_mult + 1) * __cur_plus; | ||||||
| __cur_mult *= __cur_mult; | ||||||
| __delta >>= 1; | ||||||
| } | ||||||
| return ::cuda::std::pair{__acc_mult, __acc_plus}; | ||||||
| } | ||||||
| __uint128_t __x_{}; | ||||||
|
|
||||||
| public: | ||||||
| static constexpr result_type default_seed = 0xcafef00dd15ea5e5ULL; | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why? Is this a rule?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. definitely not a rule, but suggested by some c++ secure coding guidelines, e.g. Autosar C++14 Rule A2-13-5 https://www.autosar.org/fileadmin/standards/R18-03_R1.4.0/AP/AUTOSAR_RS_CPP14Guidelines.pdf. |
||||||
|
|
||||||
| //! @brief Returns the smallest value the engine can produce. | ||||||
| //! @return Always 0 for pcg64_engine. | ||||||
| [[nodiscard]] _CCCL_API static constexpr result_type min() noexcept | ||||||
| { | ||||||
| return 0; | ||||||
| } | ||||||
| //! @brief Returns the largest value the engine can produce. | ||||||
| //! @return The maximum representable `result_type`. | ||||||
| [[nodiscard]] _CCCL_API static constexpr result_type max() noexcept | ||||||
| { | ||||||
| return ::cuda::std::numeric_limits<result_type>::max(); | ||||||
| } | ||||||
|
|
||||||
| // constructors and seeding functions | ||||||
| //! @brief Default-constructs the engine using `default_seed`. | ||||||
| _CCCL_API constexpr pcg64_engine() noexcept | ||||||
| : pcg64_engine(default_seed) | ||||||
| {} | ||||||
| //! @brief Constructs the engine and seeds it with `__seed`. | ||||||
| //! @param __seed The seed value used to initialize the engine state. | ||||||
| _CCCL_API constexpr explicit pcg64_engine(result_type __seed) noexcept | ||||||
| { | ||||||
| seed(__seed); | ||||||
| } | ||||||
|
|
||||||
| //! @brief Constructs the engine and seeds it from a SeedSequence-like object. | ||||||
| //! @tparam _Sseq A SeedSequence-like type satisfying the project's seed concept. | ||||||
| //! @param __seq The seed sequence used to initialize the internal state. | ||||||
| _CCCL_TEMPLATE(class _Sseq) | ||||||
| _CCCL_REQUIRES(::cuda::std::__is_seed_sequence<_Sseq, pcg64_engine>) | ||||||
| _CCCL_API constexpr explicit pcg64_engine(_Sseq& __seq) | ||||||
| { | ||||||
| seed(__seq); | ||||||
| } | ||||||
| //! @brief Seed the engine with an integer seed. | ||||||
| //! @param __seed The seed value; defaults to `default_seed`. | ||||||
| _CCCL_API constexpr void seed(result_type __seed = default_seed) noexcept | ||||||
| { | ||||||
| __x_ = (__seed + __increment) * __multiplier + __increment; | ||||||
| } | ||||||
|
|
||||||
| //! @brief Seed the engine from a SeedSequence-like object. | ||||||
| //! @tparam _Sseq A SeedSequence-like type providing entropy words. | ||||||
| //! @param __seq A SeedSequence-like object providing 128 bits of entropy. | ||||||
| _CCCL_TEMPLATE(class _Sseq) | ||||||
| _CCCL_REQUIRES(::cuda::std::__is_seed_sequence<_Sseq, pcg64_engine>) | ||||||
| _CCCL_API constexpr void seed(_Sseq& __seq) | ||||||
| { | ||||||
| ::cuda::std::array<::cuda::std::uint32_t, 4> data = {}; | ||||||
| __seq.generate(data.begin(), data.end()); | ||||||
| __uint128_t seed_val = data[0]; | ||||||
| seed_val = (seed_val << 32) | data[1]; | ||||||
| seed_val = (seed_val << 32) | data[2]; | ||||||
| seed_val = (seed_val << 32) | data[3]; | ||||||
| __x_ = (seed_val + __increment) * __multiplier + __increment; | ||||||
| } | ||||||
|
|
||||||
| //! @brief Generate the next pseudo-random value. | ||||||
| //! | ||||||
| //! Advances the internal LCG state and applies the PCG output | ||||||
| //! permutation to produce a 64-bit result. | ||||||
| //! @return A 64-bit pseudo-random value. | ||||||
| _CCCL_API constexpr result_type operator()() noexcept | ||||||
| { | ||||||
| __x_ = __x_ * __multiplier + __increment; | ||||||
| return __output_transform(__x_); | ||||||
fbusato marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| } | ||||||
|
|
||||||
| //! @brief Advance the engine state by `__z` steps, discarding outputs. | ||||||
| //! @param __z Number of values to discard. | ||||||
| _CCCL_API constexpr void discard(unsigned long long __z) noexcept | ||||||
| { | ||||||
| const auto [__mult, __plus] = __power_mod(__z); | ||||||
| __x_ = __x_ * __mult + __plus; | ||||||
| } | ||||||
|
|
||||||
| //! @brief Equality comparison for two engines. | ||||||
| //! @return True if both engines have identical internal state. | ||||||
| [[nodiscard]] _CCCL_API constexpr friend bool operator==(const pcg64_engine& __x, const pcg64_engine& __y) noexcept | ||||||
| { | ||||||
| return __x.__x_ == __y.__x_; | ||||||
| } | ||||||
|
|
||||||
| # if _CCCL_STD_VER <= 2017 | ||||||
| //! @brief Inequality comparison for two engines. | ||||||
| [[nodiscard]] _CCCL_API constexpr friend bool operator!=(const pcg64_engine& __x, const pcg64_engine& __y) noexcept | ||||||
RAMitchell marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| { | ||||||
| return !(__x == __y); | ||||||
| } | ||||||
| # endif // _CCCL_STD_VER <= 2017 | ||||||
|
|
||||||
| # if !_CCCL_COMPILER(NVRTC) | ||||||
RAMitchell marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
|
||||||
| template <typename _CharT, typename _Traits> | ||||||
| _CCCL_API friend ::std::basic_ostream<_CharT, _Traits>& | ||||||
| operator<<(::std::basic_ostream<_CharT, _Traits>& __os, const pcg64_engine& __e) | ||||||
| { | ||||||
| using ostream_type = ::std::basic_ostream<_CharT, _Traits>; | ||||||
| using ios_base = typename ostream_type::ios_base; | ||||||
|
|
||||||
| // save old flags & fill character | ||||||
| const typename ios_base::fmtflags __flags = __os.flags(); | ||||||
| const _CharT __fill = __os.fill(); | ||||||
|
|
||||||
| __os.flags(ios_base::dec | ios_base::fixed | ios_base::left); | ||||||
| __os.fill(__os.widen(' ')); | ||||||
| // Write 64 bits at a time | ||||||
| ::cuda::std::uint64_t __low = static_cast<::cuda::std::uint64_t>(__e.__x_); | ||||||
| ::cuda::std::uint64_t __hi = static_cast<::cuda::std::uint64_t>(__e.__x_ >> 64); | ||||||
| __os << __low; | ||||||
| __os << __os.widen(' '); | ||||||
| __os << __hi; | ||||||
| __os << __os.widen(' '); | ||||||
| // restore flags & fill character | ||||||
| __os.flags(__flags); | ||||||
| __os.fill(__fill); | ||||||
|
|
||||||
| return __os; | ||||||
| } | ||||||
|
|
||||||
| template <typename _CharT, typename _Traits> | ||||||
| _CCCL_API friend ::std::basic_istream<_CharT, _Traits>& | ||||||
| operator>>(::std::basic_istream<_CharT, _Traits>& __is, pcg64_engine& __e) | ||||||
| { | ||||||
| using istream_type = ::std::basic_istream<_CharT, _Traits>; | ||||||
| using ios_base = typename istream_type::ios_base; | ||||||
|
|
||||||
| // save old flags | ||||||
| const typename ios_base::fmtflags __flags = __is.flags(); | ||||||
|
|
||||||
| __is.flags(ios_base::dec | ios_base::skipws); | ||||||
|
|
||||||
| ::cuda::std::uint64_t __low, __hi; | ||||||
| __is >> __low; | ||||||
| __is >> __hi; | ||||||
| // Read engine state from stream: low 64 bits then high 64 bits. | ||||||
| __e.__x_ = (static_cast<__uint128_t>(__hi) << 64) | __low; | ||||||
| // restore flags | ||||||
| __is.flags(__flags); | ||||||
|
|
||||||
| return __is; | ||||||
| } | ||||||
| # endif // !_CCCL_COMPILER(NVRTC) | ||||||
| }; | ||||||
|
|
||||||
| //! @class pcg64 | ||||||
| //! @brief A 128-bit state PCG engine producing 64-bit output values. | ||||||
| //! | ||||||
| //! This class implements the PCG XSL RR 128/64 generator described in: | ||||||
| //! O'neill, Melissa E. "PCG: A family of simple fast space-efficient statistically good algorithms for random number | ||||||
| //! generation." ACM Transactions on Mathematical Software 204 (2014): 1-46. The engine keeps a 128-bit internal state | ||||||
| //! and returns 64-bit pseudo-random values. PCG64 is a fast general purpose PRNG that passes common statistical tests, | ||||||
| //! has a long period (2^128), and can discard values in O(log n) time. | ||||||
| //! | ||||||
| //! PCG64 produces the 10000th value 11135645891219275043 when seeded with the default seed. | ||||||
| //! | ||||||
| //! Usage example: | ||||||
| //! @code | ||||||
| //! #include <cuda/random> | ||||||
| //! | ||||||
| //! cuda::pcg64 eng; // default seed | ||||||
| //! uint64_t v = eng(); // draw value | ||||||
| //! eng.seed(42); // reseed | ||||||
| //! eng.discard(10); // skip 10 outputs | ||||||
| //! @endcode | ||||||
| //! | ||||||
| //! @note This class requires compiler support for 128-bit integers. | ||||||
| using pcg64 = | ||||||
| pcg64_engine<2549297995355413924ull, 4865540595714422341ull, 6364136223846793005ull, 1442695040888963407ull>; | ||||||
|
|
||||||
| #endif // _CCCL_HAS_INT128() | ||||||
| _CCCL_END_NAMESPACE_CUDA | ||||||
|
|
||||||
| #include <cuda/std/__cccl/epilogue.h> | ||||||
|
|
||||||
| #endif // _CUDA_STD___RANDOM_PCG_ENGINE_H | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| // <random> | ||
|
|
||
| #include <cuda/__random/pcg_engine.h> | ||
|
|
||
| #include "random_utilities/test_engine.h" | ||
|
|
||
| #if _CCCL_HAS_INT128() | ||
|
|
||
| __host__ __device__ constexpr bool test_against_reference() | ||
| { | ||
| // reference values obtained from other library implementations | ||
| constexpr int seeds[] = {10823018, 0, 23}; | ||
| constexpr int discards[] = {0, 5, 100}; | ||
| constexpr cuda::std::uint64_t reference_values[] = { | ||
| 11492238902574317825ull, | ||
| 8322011739913317518ull, | ||
| 16162292887622315191ull, | ||
| 74029666500212977ull, | ||
| 7381380909356947872ull, | ||
| 13353295228484708474ull, | ||
| 11051782693829522167ull, | ||
| 8996870419832475944ull, | ||
| 14156256770140333413ull}; | ||
|
|
||
| int ref_index = 0; | ||
| for (auto seed : seeds) | ||
| { | ||
| for (auto discard : discards) | ||
| { | ||
| cuda::pcg64 rng(seed); | ||
| rng.discard(discard); | ||
| assert(rng() == reference_values[ref_index]); | ||
| ref_index++; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
| #endif // _CCCL_HAS_INT128() | ||
|
|
||
| int main(int, char**) | ||
| { | ||
| #if _CCCL_HAS_INT128() | ||
| test_engine<cuda::pcg64, 11135645891219275043ul>(); | ||
| test_against_reference(); | ||
| #endif // _CCCL_HAS_INT128() | ||
| return 0; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would be better to keep private members at the end of the class
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually not the general advice is to move them to the front, because they are crucial to understand what is in the class
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I normally belong to the "end of class" church, but we can do whatever here :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The point being that usually a class implementation starts with constructors and other SMF, so I need to know what are the actual data members. In that case and others I have to jump around to the back of the potentially long definition to know what I am working with
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
interesting, I always thought about user perspective. Users are interested to the interface, not implementation details. Also, the implementation rarely changes.
Anyway, I'm fine with both approaches.