1515#include < rmm/mr/per_device_resource.hpp>
1616#include < rmm/mr/pool_memory_resource.hpp>
1717
18+ #include < cuda/stream_ref>
1819#include < cuda_runtime_api.h>
1920#include < thrust/device_vector.h>
2021#include < thrust/memory.h>
@@ -31,7 +32,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state)
3132
3233 for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores)
3334 rmm::device_uvector<std::int32_t > vec (static_cast <std::size_t >(state.range (0 )),
34- rmm::cuda_stream_view{ });
35+ cuda::stream_ref{cudaStream_t{ nullptr } });
3536 cudaDeviceSynchronize ();
3637 }
3738
@@ -78,7 +79,7 @@ using rmm_vector = rmm::device_vector<int32_t>;
7879using rmm_uvector = rmm::device_uvector<int32_t >;
7980
8081template <typename Vector>
81- Vector make_vector (std::size_t num_elements, rmm::cuda_stream_view stream, bool zero_init = false )
82+ Vector make_vector (std::size_t num_elements, cuda::stream_ref stream, bool zero_init = false )
8283{
8384 static_assert (std::is_same_v<Vector, thrust_vector> or std::is_same_v<Vector, rmm_vector> or
8485 std::is_same_v<Vector, rmm_uvector>,
@@ -90,7 +91,7 @@ Vector make_vector(std::size_t num_elements, rmm::cuda_stream_view stream, bool
9091 } else if constexpr (std::is_same_v<Vector, rmm_uvector>) {
9192 auto vec = Vector (num_elements, stream);
9293 if (zero_init) {
93- cudaMemsetAsync (vec.data (), 0 , num_elements * sizeof (std::int32_t ), stream.value ());
94+ cudaMemsetAsync (vec.data (), 0 , num_elements * sizeof (std::int32_t ), stream.get ());
9495 }
9596 return vec;
9697 }
@@ -111,14 +112,14 @@ void vector_workflow(std::size_t num_elements,
111112{
112113 auto input = make_vector<Vector>(num_elements, input_stream, true );
113114 input_stream.synchronize ();
114- for (rmm::cuda_stream_view stream : streams) {
115+ for (cuda::stream_ref stream : streams) {
115116 auto output = make_vector<Vector>(num_elements, stream);
116- kernel<<<num_blocks, block_size, 0 , stream.value ()>>> (
117+ kernel<<<num_blocks, block_size, 0 , stream.get ()>>> (
117118 vector_data (input), vector_data (output), num_elements);
118119 }
119120
120- for (rmm::cuda_stream_view stream : streams) {
121- stream.synchronize ( );
121+ for (cuda::stream_ref stream : streams) {
122+ RMM_CUDA_TRY ( cudaStreamSynchronize ( stream.get ()) );
122123 }
123124}
124125
0 commit comments