Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
438fc34
Basic impelementation
RAMitchell Oct 17, 2025
56d9811
First attempt
RAMitchell Oct 20, 2025
f587dd2
Remove redundant file
RAMitchell Oct 20, 2025
baacf43
Passes tests
RAMitchell Oct 20, 2025
0f82787
Style
RAMitchell Oct 20, 2025
c67254e
Add some docs
RAMitchell Oct 20, 2025
965e201
Remove file
RAMitchell Oct 20, 2025
d635d9d
Review comments
RAMitchell Oct 21, 2025
75adb20
Efficient discard
RAMitchell Oct 21, 2025
f0c34ad
Guard against int128
RAMitchell Oct 21, 2025
03c2e23
Fix endif
RAMitchell Oct 24, 2025
54275c6
Merge branch 'main' of github.com:NVIDIA/cccl into pcg
RAMitchell Oct 28, 2025
b89e216
Pass tests
RAMitchell Oct 28, 2025
f189363
Test against reference values
RAMitchell Oct 28, 2025
009ada6
Review comments
RAMitchell Nov 7, 2025
a628a81
Create alias pcg64
RAMitchell Nov 7, 2025
46b7eae
Merge branch 'main' of github.com:NVIDIA/cccl into pcg
RAMitchell Nov 10, 2025
78e1f4a
Fix tests
RAMitchell Nov 10, 2025
a289174
Try again guarding msvc test
RAMitchell Nov 10, 2025
5d9cdae
Merge branch 'main' of github.com:NVIDIA/cccl into pcg
RAMitchell Nov 10, 2025
bca96fb
Review comments
RAMitchell Nov 12, 2025
57c5b9a
Merge branch 'main' of github.com:NVIDIA/cccl into pcg
RAMitchell Nov 13, 2025
7f60a3b
Merge branch 'main' of github.com:NVIDIA/cccl into pcg
RAMitchell Nov 14, 2025
d2b51d8
Review comments
RAMitchell Nov 17, 2025
36c6a1e
Move test files
RAMitchell Nov 17, 2025
a4cfa4f
Review comments
RAMitchell Nov 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 274 additions & 0 deletions libcudacxx/include/cuda/__random/pcg_engine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___RANDOM_PCG_ENGINE_H
#define _CUDA_STD___RANDOM_PCG_ENGINE_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__bit/rotate.h>
#include <cuda/std/__limits/numeric_limits.h>
#include <cuda/std/__random/is_seed_sequence.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__utility/pair.h>
#include <cuda/std/array>
#include <cuda/std/cstdint>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA

#if _CCCL_HAS_INT128()

//! @brief A 64-bit permuted congruential generator (PCG) random number engine.
//!
//! This is a high-quality, fast random number generator based on the PCG family
//! of algorithms. It uses a 128-bit internal state and produces 64-bit output
//! values using a permutation function applied to a linear congruential generator.
//!
//! Most users should use the predefined `pcg64` type alias instead of this class directly.
//!
//! @tparam _AHi The high 64 bits of the multiplier constant for the LCG.
//! @tparam _ALo The low 64 bits of the multiplier constant for the LCG.
//! @tparam _CHi The high 64 bits of the increment constant for the LCG.
//! @tparam _CLo The low 64 bits of the increment constant for the LCG.
//!
//! @note This class requires compiler support for 128-bit integers.
//!
//! @see https://www.pcg-random.org/ for details on the PCG family of generators.
template <::cuda::std::uint64_t _AHi, ::cuda::std::uint64_t _ALo, ::cuda::std::uint64_t _CHi, ::cuda::std::uint64_t _CLo>
class pcg64_engine
{
public:
using result_type = ::cuda::std::uint64_t;

private:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be better to keep private members at the end of the class

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually not the general advice is to move them to the front, because they are crucial to understand what is in the class

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I normally belong to the "end of class" church, but we can do whatever here :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point being that usually a class implementation starts with constructors and other SMF, so I need to know what are the actual data members. In that case and others I have to jump around to the back of the potentially long definition to know what I am working with

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting, I always thought about user perspective. Users are interested to the interface, not implementation details. Also, the implementation rarely changes.
Anyway, I'm fine with both approaches.

using __bitcount_t = ::cuda::std::uint8_t;

static constexpr __uint128_t __multiplier = (static_cast<__uint128_t>(_AHi) << 64) | _ALo;
static constexpr __uint128_t __increment = (static_cast<__uint128_t>(_CHi) << 64) | _CLo;

[[nodiscard]] _CCCL_API static constexpr result_type __output_transform(__uint128_t __internal) noexcept
{
const auto __rot = static_cast<__bitcount_t>(__internal >> 122);
__internal ^= __internal >> 64;
return ::cuda::std::rotr(result_type(__internal), __rot);
}

[[nodiscard]] _CCCL_API static constexpr ::cuda::std::pair<__uint128_t, __uint128_t>
__power_mod(__uint128_t __delta) noexcept
{
__uint128_t __acc_mult = 1;
__uint128_t __acc_plus = 0;
__uint128_t __cur_mult = __multiplier;
__uint128_t __cur_plus = __increment;
while (__delta > 0)
{
if (__delta & 1)
{
__acc_mult *= __cur_mult;
__acc_plus = __acc_plus * __cur_mult + __cur_plus;
}
__cur_plus = (__cur_mult + 1) * __cur_plus;
__cur_mult *= __cur_mult;
__delta >>= 1;
}
return ::cuda::std::pair{__acc_mult, __acc_plus};
}
__uint128_t __x_{};

public:
static constexpr result_type default_seed = 0xcafef00dd15ea5e5ULL;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
static constexpr result_type default_seed = 0xcafef00dd15ea5e5ULL;
static constexpr result_type default_seed = 0xCAFEF00DD15EA5E5ull;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why? Is this a rule?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

definitely not a rule, but suggested by some c++ secure coding guidelines, e.g. Autosar C++14 Rule A2-13-5 https://www.autosar.org/fileadmin/standards/R18-03_R1.4.0/AP/AUTOSAR_RS_CPP14Guidelines.pdf. ull lowercase to have a clear distinction with uppercase digits


//! @brief Returns the smallest value the engine can produce.
//! @return Always 0 for pcg64_engine.
[[nodiscard]] _CCCL_API static constexpr result_type min() noexcept
{
return 0;
}
//! @brief Returns the largest value the engine can produce.
//! @return The maximum representable `result_type`.
[[nodiscard]] _CCCL_API static constexpr result_type max() noexcept
{
return ::cuda::std::numeric_limits<result_type>::max();
}

// constructors and seeding functions
//! @brief Default-constructs the engine using `default_seed`.
_CCCL_API constexpr pcg64_engine() noexcept
: pcg64_engine(default_seed)
{}
//! @brief Constructs the engine and seeds it with `__seed`.
//! @param __seed The seed value used to initialize the engine state.
_CCCL_API constexpr explicit pcg64_engine(result_type __seed) noexcept
{
seed(__seed);
}

//! @brief Constructs the engine and seeds it from a SeedSequence-like object.
//! @tparam _Sseq A SeedSequence-like type satisfying the project's seed concept.
//! @param __seq The seed sequence used to initialize the internal state.
_CCCL_TEMPLATE(class _Sseq)
_CCCL_REQUIRES(::cuda::std::__is_seed_sequence<_Sseq, pcg64_engine>)
_CCCL_API constexpr explicit pcg64_engine(_Sseq& __seq)
{
seed(__seq);
}
//! @brief Seed the engine with an integer seed.
//! @param __seed The seed value; defaults to `default_seed`.
_CCCL_API constexpr void seed(result_type __seed = default_seed) noexcept
{
__x_ = (__seed + __increment) * __multiplier + __increment;
}

//! @brief Seed the engine from a SeedSequence-like object.
//! @tparam _Sseq A SeedSequence-like type providing entropy words.
//! @param __seq A SeedSequence-like object providing 128 bits of entropy.
_CCCL_TEMPLATE(class _Sseq)
_CCCL_REQUIRES(::cuda::std::__is_seed_sequence<_Sseq, pcg64_engine>)
_CCCL_API constexpr void seed(_Sseq& __seq)
{
::cuda::std::array<::cuda::std::uint32_t, 4> data = {};
__seq.generate(data.begin(), data.end());
__uint128_t seed_val = data[0];
seed_val = (seed_val << 32) | data[1];
seed_val = (seed_val << 32) | data[2];
seed_val = (seed_val << 32) | data[3];
__x_ = (seed_val + __increment) * __multiplier + __increment;
}

//! @brief Generate the next pseudo-random value.
//!
//! Advances the internal LCG state and applies the PCG output
//! permutation to produce a 64-bit result.
//! @return A 64-bit pseudo-random value.
_CCCL_API constexpr result_type operator()() noexcept
{
__x_ = __x_ * __multiplier + __increment;
return __output_transform(__x_);
}

//! @brief Advance the engine state by `__z` steps, discarding outputs.
//! @param __z Number of values to discard.
_CCCL_API constexpr void discard(unsigned long long __z) noexcept
{
const auto [__mult, __plus] = __power_mod(__z);
__x_ = __x_ * __mult + __plus;
}

//! @brief Equality comparison for two engines.
//! @return True if both engines have identical internal state.
[[nodiscard]] _CCCL_API constexpr friend bool operator==(const pcg64_engine& __x, const pcg64_engine& __y) noexcept
{
return __x.__x_ == __y.__x_;
}

# if _CCCL_STD_VER <= 2017
//! @brief Inequality comparison for two engines.
[[nodiscard]] _CCCL_API constexpr friend bool operator!=(const pcg64_engine& __x, const pcg64_engine& __y) noexcept
{
return !(__x == __y);
}
# endif // _CCCL_STD_VER <= 2017

# if !_CCCL_COMPILER(NVRTC)

template <typename _CharT, typename _Traits>
_CCCL_API friend ::std::basic_ostream<_CharT, _Traits>&
operator<<(::std::basic_ostream<_CharT, _Traits>& __os, const pcg64_engine& __e)
{
using ostream_type = ::std::basic_ostream<_CharT, _Traits>;
using ios_base = typename ostream_type::ios_base;

// save old flags & fill character
const typename ios_base::fmtflags __flags = __os.flags();
const _CharT __fill = __os.fill();

__os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
__os.fill(__os.widen(' '));
// Write 64 bits at a time
::cuda::std::uint64_t __low = static_cast<::cuda::std::uint64_t>(__e.__x_);
::cuda::std::uint64_t __hi = static_cast<::cuda::std::uint64_t>(__e.__x_ >> 64);
__os << __low;
__os << __os.widen(' ');
__os << __hi;
__os << __os.widen(' ');
// restore flags & fill character
__os.flags(__flags);
__os.fill(__fill);

return __os;
}

template <typename _CharT, typename _Traits>
_CCCL_API friend ::std::basic_istream<_CharT, _Traits>&
operator>>(::std::basic_istream<_CharT, _Traits>& __is, pcg64_engine& __e)
{
using istream_type = ::std::basic_istream<_CharT, _Traits>;
using ios_base = typename istream_type::ios_base;

// save old flags
const typename ios_base::fmtflags __flags = __is.flags();

__is.flags(ios_base::dec | ios_base::skipws);

::cuda::std::uint64_t __low, __hi;
__is >> __low;
__is >> __hi;
// Read engine state from stream: low 64 bits then high 64 bits.
__e.__x_ = (static_cast<__uint128_t>(__hi) << 64) | __low;
// restore flags
__is.flags(__flags);

return __is;
}
# endif // !_CCCL_COMPILER(NVRTC)
};

//! @class pcg64
//! @brief A 128-bit state PCG engine producing 64-bit output values.
//!
//! This class implements the PCG XSL RR 128/64 generator described in:
//! O'neill, Melissa E. "PCG: A family of simple fast space-efficient statistically good algorithms for random number
//! generation." ACM Transactions on Mathematical Software 204 (2014): 1-46. The engine keeps a 128-bit internal state
//! and returns 64-bit pseudo-random values. PCG64 is a fast general purpose PRNG that passes common statistical tests,
//! has a long period (2^128), and can discard values in O(log n) time.
//!
//! PCG64 produces the 10000th value 11135645891219275043 when seeded with the default seed.
//!
//! Usage example:
//! @code
//! #include <cuda/random>
//!
//! cuda::pcg64 eng; // default seed
//! uint64_t v = eng(); // draw value
//! eng.seed(42); // reseed
//! eng.discard(10); // skip 10 outputs
//! @endcode
//!
//! @note This class requires compiler support for 128-bit integers.
using pcg64 =
pcg64_engine<2549297995355413924ull, 4865540595714422341ull, 6364136223846793005ull, 1442695040888963407ull>;

#endif // _CCCL_HAS_INT128()
_CCCL_END_NAMESPACE_CUDA

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___RANDOM_PCG_ENGINE_H
56 changes: 56 additions & 0 deletions libcudacxx/test/libcudacxx/cuda/random/pcg64.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

// <random>

#include <cuda/__random/pcg_engine.h>

#include "random_utilities/test_engine.h"

#if _CCCL_HAS_INT128()

__host__ __device__ constexpr bool test_against_reference()
{
// reference values obtained from other library implementations
constexpr int seeds[] = {10823018, 0, 23};
constexpr int discards[] = {0, 5, 100};
constexpr cuda::std::uint64_t reference_values[] = {
11492238902574317825ull,
8322011739913317518ull,
16162292887622315191ull,
74029666500212977ull,
7381380909356947872ull,
13353295228484708474ull,
11051782693829522167ull,
8996870419832475944ull,
14156256770140333413ull};

int ref_index = 0;
for (auto seed : seeds)
{
for (auto discard : discards)
{
cuda::pcg64 rng(seed);
rng.discard(discard);
assert(rng() == reference_values[ref_index]);
ref_index++;
}
}
return true;
}
#endif // _CCCL_HAS_INT128()

int main(int, char**)
{
#if _CCCL_HAS_INT128()
test_engine<cuda::pcg64, 11135645891219275043ul>();
test_against_reference();
#endif // _CCCL_HAS_INT128()
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include <cuda/std/__random_>

#include "test_engine.h"
#include "random_utilities/test_engine.h"

template <typename Engine>
__host__ __device__ TEST_CONSTEXPR_CXX20 bool test_set_counter()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ __host__ __device__ TEST_CONSTEXPR_CXX20 bool test_min_max()
}

#if !_CCCL_COMPILER(NVRTC)
# include <sstream>
template <typename Engine>
void test_save_restore()
{
Expand Down
Loading