Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_radix_sort_onesweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ enum RadixSortStoreAlgorithm
RADIX_SORT_STORE_ALIGNED
};

#if !_CCCL_COMPILER(NVRTC)
#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, RadixSortStoreAlgorithm algo)
Comment on lines +61 to 62
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: I don't understand why Doxygen cannot handle operator<<. I think it should be publicly documented it would be sad to see it go. What's the problem here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont know either, from the error message I guess it is that it tries to putt the << into the section header and that does things

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Super annoying! I hope we can bring the docs back at some point.

{
switch (algo)
Expand All @@ -71,7 +71,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, RadixSortStoreAlgorithm al
return os << "<unknown RadixSortStoreAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED

template <int NominalBlockThreads4B,
int NominalItemsPerThread4B,
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_load.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ enum BlockLoadAlgorithm
BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED,
};

#if !_CCCL_COMPILER(NVRTC)
#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, BlockLoadAlgorithm algo)
{
switch (algo)
Expand All @@ -717,7 +717,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, BlockLoadAlgorithm algo)
return os << "<unknown BlockLoadAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockLoad class provides :ref:`collective <collective-primitives>` data movement methods for loading a linear
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_radix_rank.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ enum RadixRankAlgorithm
RADIX_RANK_MATCH_EARLY_COUNTS_ATOMIC_OR
};

#if !_CCCL_COMPILER(NVRTC)
#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, RadixRankAlgorithm algo)
{
switch (algo)
Expand All @@ -93,7 +93,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, RadixRankAlgorithm algo)
return os << "<unknown RadixRankAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED

/** Empty callback implementation */
template <int BINS_PER_THREAD>
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ enum BlockReduceAlgorithm
BLOCK_REDUCE_WARP_REDUCTIONS_NONDETERMINISTIC,
};

#if !_CCCL_COMPILER(NVRTC)
#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, const BlockReduceAlgorithm& alg)
{
switch (alg)
Expand All @@ -169,7 +169,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, const BlockReduceAlgorithm
return os << "<unknown BlockReduceAlgorithm: " << static_cast<int>(alg) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockReduce class provides :ref:`collective <collective-primitives>` methods for computing a
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ enum BlockScanAlgorithm
BLOCK_SCAN_WARP_SCANS,
};

#if !_CCCL_COMPILER(NVRTC)
#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, BlockScanAlgorithm algo)
{
switch (algo)
Expand All @@ -118,7 +118,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, BlockScanAlgorithm algo)
return os << "<unknown BlockScanAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockScan class provides :ref:`collective <collective-primitives>` methods for computing a parallel prefix
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_store.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ enum BlockStoreAlgorithm
BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED,
};

#if !_CCCL_COMPILER(NVRTC)
#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, BlockStoreAlgorithm algo)
{
switch (algo)
Expand All @@ -551,7 +551,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, BlockStoreAlgorithm algo)
return os << "<unknown BlockStoreAlgorithm: " << static_cast<int>(algo) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED

//! @rst
//! The BlockStore class provides :ref:`collective <collective-primitives>` data movement
Expand Down
14 changes: 7 additions & 7 deletions cub/cub/device/dispatch/dispatch_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,32 @@

CUB_NAMESPACE_BEGIN

// Options for specifying memory aliasing
//! @brief Options for specifying memory aliasing
enum class MayAlias
{
Yes,
No
};

// Options for specifying sorting order.
//! @brief Options for specifying sorting order.
enum class SortOrder
{
Ascending,
Descending
};

// Options for specifying the behavior of the stream compaction algorithm.
//! @brief Options for specifying the behavior of the stream compaction algorithm.
enum class SelectImpl
{
// Stream compaction, discarding rejected items. It's required that memory of input and output are disjoint.
//! Stream compaction, discarding rejected items. It's required that memory of input and output are disjoint.
Select,
// Stream compaction, discarding rejected items. Memory of the input may be identical to the memory of the output.
//! Stream compaction, discarding rejected items. Memory of the input may be identical to the memory of the output.
SelectPotentiallyInPlace,
// Partition, keeping rejected items. It's required that memory of input and output are disjoint.
//! Partition, keeping rejected items. It's required that memory of input and output are disjoint.
Partition
};

// Options for forcing inclusive prefix-scan even when initial value has been provided
//! @brief Options for forcing inclusive prefix-scan even when initial value has been provided
enum class ForceInclusive
{
Yes,
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/thread/thread_load.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ enum CacheLoadModifier
LOAD_VOLATILE, ///< Volatile (any memory space)
};

#if !_CCCL_COMPILER(NVRTC)
#if !_CCCL_COMPILER(NVRTC) && !defined(_CCCL_DOXYGEN_INVOKED)
inline ::std::ostream& operator<<(::std::ostream& os, CacheLoadModifier modifier)
{
switch (modifier)
Expand All @@ -73,7 +73,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, CacheLoadModifier modifier
return os << "<unknown CacheLoadModifier: " << static_cast<int>(modifier) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)
#endif // !_CCCL_COMPILER(NVRTC) && !_CCCL_DOXYGEN_INVOKED

/**
* @name Thread I/O (cache modified)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ public:
}

//! @rst
//! Returns a \c const reference to the :ref:`any_resource <cudax-memory-resource-any-resource>`
//! Returns a \c const reference to the :ref:`any_resource <libcudacxx-memory-resource-any-resource>`
//! that holds the memory resource used to allocate the buffer
//! @endrst
[[nodiscard]] _CCCL_HIDE_FROM_ABI const __resource& memory_resource() const noexcept
Expand Down
6 changes: 3 additions & 3 deletions docs/cudax/container.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ annotations are checked by the type system.
:widths: 25 45 30
:header-rows: 0

* - :ref:`<cuda/experimental/container.cuh> <cudax-containers-uninitialized-buffer>`
* - :ref:`<cuda/experimental/container.cuh> <libcudacxx-containers-uninitialized-buffer>`
- Facilities providing uninitialized *heterogeneous* potentially stream ordered storage satisfying a set of properties
- cudax 2.7.0 / CCCL 2.7.0
* - :ref:`<cuda/experimental/container.cuh> <cudax-containers-heterogeneous-iterator>`
* - :ref:`<cuda/experimental/container.cuh> <libcudacxx-containers-heterogeneous-iterator>`
- Iterator providing type safe iteration and memory access through a set of properties
- cudax 2.7.0 / CCCL 2.7.0
* - :ref:`<cuda/experimental/container.cuh> <cudax-containers-async-vector>`
* - :ref:`<cuda/experimental/container.cuh> <libcudacxx-containers-async-vector>`
- High level containers, that provide *heterogeneous* storage satisfying a set of properties
- cudax 2.7.0 / CCCL 2.7.0
6 changes: 3 additions & 3 deletions docs/cudax/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ CUDA Experimental
However, any feature within this library has important use cases and we encourage users to experiment with them.

Specifically, ``cudax`` provides:
- :ref:`uninitialized storage <cudax-containers-uninitialized-buffer>`
- :ref:`an owning type erased memory resource <cudax-memory-resource-any-async-resource>`
- :ref:`stream-ordered memory resources <cudax-memory-resource-async>`
- :ref:`uninitialized storage <libcudacxx-containers-uninitialized-buffer>`
- :ref:`an owning type erased memory resource <libcudacxx-memory-resource-any-async-resource>`
- :ref:`stream-ordered memory resources <libcudacxx-memory-resource-async>`
- :ref:`graph functionality <cudax-graph>`
- dimensions description functionality
- :ref:`an implementation of the STF (Sequential Task Flow) programming model <stf>`
Expand Down
10 changes: 5 additions & 5 deletions docs/cudax/memory_resource.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ Memory Resources

The ``<cuda/experimental/memory_resource.cuh>`` header provides:

- :ref:`any_synchronous_resource <cudax-memory-resource-any-resource>` and
:ref:`any_resource <cudax-memory-resource-any-async-resource>` type erased memory resources similar to
- :ref:`any_synchronous_resource <libcudacxx-memory-resource-any-resource>` and
:ref:`any_resource <libcudacxx-memory-resource-any-async-resource>` type erased memory resources similar to
``std::any``. In contrast to :ref:`resource_ref <libcudacxx-extended-api-memory-resources-resource-ref>` they
own the contained resource.
- :ref:`device_memory_resource <cudax-memory-resource-async>` A standard C++ interface for *heterogeneous*,
- :ref:`device_memory_resource <libcudacxx-memory-resource-async>` A standard C++ interface for *heterogeneous*,
*stream-ordered* memory allocation tailored to the needs of CUDA C++ developers. This design builds off of the
success of the `RAPIDS Memory Manager (RMM) <https://github.com/rapidsai/rmm>`__ project and evolves the design
based on lessons learned.
- :ref:`shared_resource <cudax-memory-resource-shared-resource>` a type erased reference counted memory resource.
In contrast to :ref:`any_resource <cudax-memory-resource-any-resource>` it additionally provides shared ownership
- :ref:`shared_resource <libcudacxx-memory-resource-shared-resource>` a type erased reference counted memory resource.
In contrast to :ref:`any_resource <libcudacxx-memory-resource-any-resource>` it additionally provides shared ownership
semantics.

``<cuda/experimental/memory_resource.cuh>`` is not intended to replace RMM, but instead moves the definition of the
Expand Down
9 changes: 5 additions & 4 deletions docs/libcudacxx/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ GENERATE_XML = YES
XML_OUTPUT = xml
XML_PROGRAMLISTING = YES

INPUT = ../../libcudacxx/include/cuda/__iterator \
../../libcudacxx/include/cuda/__stream \
INPUT = ../../libcudacxx/include/cuda/__algorithm \
../../libcudacxx/include/cuda/__container \
../../libcudacxx/include/cuda/__device \
../../libcudacxx/include/cuda/__event \
../../libcudacxx/include/cuda/__algorithm \
../../libcudacxx/include/cuda/__memory_resource \
../../libcudacxx/include/cuda/__iterator \
../../libcudacxx/include/cuda/__memory_pool \
../../libcudacxx/include/cuda/__memory_resource \
../../libcudacxx/include/cuda/__stream \
../../libcudacxx/include/nv

RECURSIVE = YES
Expand Down
1 change: 1 addition & 0 deletions docs/libcudacxx/extended_api/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Math
math/uabs
math/fast_mod_div
math/mul_hi
math/sincos

.. list-table::
:widths: 25 45 30 30
Expand Down
2 changes: 1 addition & 1 deletion docs/libcudacxx/extended_api/memory_resource.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,5 @@ With the current design it ranges from cumbersome to impossible to verify whethe
that are e.g. accessible on device, or whether it can utilize other allocation mechanisms.

To better support asynchronous CUDA `stream-ordered allocations <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#stream-ordered-memory-allocator>`__
libcu++ provides :ref:`cuda::stream_ref <libcudacxx-extended-api-streams-stream-ref>` as a wrapper around
libcu++ provides :cpp:class:`cuda::stream_ref <cuda::stream_ref>` as a wrapper around
``cudaStream_t``. The definition of ``cuda::stream_ref`` can be found in the ``<cuda/stream>`` header.
2 changes: 1 addition & 1 deletion docs/libcudacxx/ptx/pragmas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ PTX Pragmas
.. toctree::
:maxdepth: 1

pragma/enable_smem_spilling
pragmas/enable_smem_spilling

.. list-table:: `.pragma Strings <https://docs.nvidia.com/cuda/parallel-thread-execution/#descriptions-pragma-strings>`__
:widths: 50 50
Expand Down
1 change: 1 addition & 0 deletions docs/libcudacxx/runtime/event.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Availability: CCCL 3.1.0 / CUDA 13.1
}

.. _cccl-runtime-event-timed-event:

``cuda::timed_event``
-----------------------------------------------------

Expand Down
7 changes: 3 additions & 4 deletions libcudacxx/include/cuda/__container/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ template <class _Env>
inline constexpr bool __buffer_compatible_env = ::cuda::std::is_same_v<_Env, ::cuda::std::execution::env<>>;

//! @rst
//! .. _cudax-containers-async-vector:
//! .. _libcudacxx-containers-async-vector:
//!
//! buffer
//! -------------
Expand Down Expand Up @@ -520,9 +520,8 @@ class buffer
//! @}

//! @rst
//! Returns a \c const reference to the :ref:`any_resource
//! <cuda-memory-resource-any-resource>` that holds the memory resource used
//! to allocate the buffer
//! Returns a \c const reference to the :ref:`any_resource <libcudacxx-memory-resource-any-resource>` that holds the
//! memory resource used to allocate the buffer
//! @endrst
[[nodiscard]] _CCCL_HIDE_FROM_ABI const __resource_t& memory_resource() const noexcept
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ enum class _IsConstIter
__yes,
};
//! @rst
//! .. _cudax-containers-heterogeneous-iterator:
//! .. _libcudacxx-containers-heterogeneous-iterator:
//!
//! Type safe iterator over heterogeneous memory
//! ---------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
_CCCL_BEGIN_NAMESPACE_CUDA

//! @rst
//! .. _cudax-containers-uninitialized-async-buffer:
//! .. _libcudacxx-containers-uninitialized-async-buffer:
//!
//! Uninitialized stream-ordered type-safe memory storage
//! ------------------------------------------------------
Expand Down
22 changes: 12 additions & 10 deletions libcudacxx/include/cuda/__iterator/permutation_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ class permutation_iterator
return __tmp;
}

#ifndef _CCCL_DOXYGEN_INVOKED // Doxygen has issues with constexpr friend operators
//! @brief Advances a @c permutation_iterator by a given number of elements
//! @param __iter The original @c permutation_iterator
//! @param __n The number of elements to advance
Expand All @@ -292,16 +293,6 @@ class permutation_iterator
return permutation_iterator{__iter.__iter_, __iter.__index_ + __n};
}

//! @brief Advances the @c permutation_iterator by a given number of elements
//! @param __n The number of elements to advance
//! @return Equivalent to ``index + __n``
_CCCL_EXEC_CHECK_DISABLE
_CCCL_API constexpr permutation_iterator& operator+=(difference_type __n) noexcept(noexcept(__index_ += __n))
{
__index_ += __n;
return *this;
}

//! @brief Decrements a @c permutation_iterator by a given number of elements
//! @param __iter The original @c permutation_iterator
//! @param __n The number of elements to decrement
Expand All @@ -314,6 +305,17 @@ class permutation_iterator
{
return permutation_iterator{__iter.__iter_, __iter.__index_ - __n};
}
#endif // !_CCCL_DOXYGEN_INVOKED

//! @brief Advances the @c permutation_iterator by a given number of elements
//! @param __n The number of elements to advance
//! @return Equivalent to ``index + __n``
_CCCL_EXEC_CHECK_DISABLE
_CCCL_API constexpr permutation_iterator& operator+=(difference_type __n) noexcept(noexcept(__index_ += __n))
{
__index_ += __n;
return *this;
}

//! @brief Decrements the @c permutation_iterator by a given number of elements
//! @param __n The number of elements to decrement
Expand Down
Loading