diff --git a/sycl/include/CL/sycl/detail/property_helper.hpp b/sycl/include/CL/sycl/detail/property_helper.hpp index 1b5a79097dfee..d87e03c53167d 100644 --- a/sycl/include/CL/sycl/detail/property_helper.hpp +++ b/sycl/include/CL/sycl/detail/property_helper.hpp @@ -32,8 +32,9 @@ enum DataLessPropKind { BufferUsePinnedHostMemory = 5, UsePrimaryContext = 6, InitializeToIdentity = 7, + UseDefaultStream = 8, // Indicates the last known dataless property. - LastKnownDataLessPropKind = 7, + LastKnownDataLessPropKind = 8, // Exceeding 32 may cause ABI breaking change on some of OSes. DataLessPropKindSize = 32 }; diff --git a/sycl/include/CL/sycl/properties/queue_properties.hpp b/sycl/include/CL/sycl/properties/queue_properties.hpp index 6d596fcf6a67c..31cdb26e6fb2d 100644 --- a/sycl/include/CL/sycl/properties/queue_properties.hpp +++ b/sycl/include/CL/sycl/properties/queue_properties.hpp @@ -17,6 +17,10 @@ namespace queue { class in_order : public detail::DataLessProperty {}; class enable_profiling : public detail::DataLessProperty {}; +namespace cuda { +class use_default_stream + : public detail::DataLessProperty {}; +} // namespace cuda } // namespace queue } // namespace property } // namespace sycl diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index aea55006a6fba..2843403857b6e 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -248,6 +249,9 @@ class queue_impl { if (MPropList.has_property()) { CreationFlags |= PI_QUEUE_PROFILING_ENABLE; } + if (MPropList.has_property()) { + CreationFlags |= __SYCL_PI_CUDA_USE_DEFAULT_STREAM; + } RT::PiQueue Queue{}; RT::PiContext Context = MContext->getHandleRef(); RT::PiDevice Device = MDevice->getHandleRef(); diff --git a/sycl/unittests/pi/cuda/test_queue.cpp b/sycl/unittests/pi/cuda/test_queue.cpp index 39ee2731df03a..2bb5ffbe2347f 100644 --- a/sycl/unittests/pi/cuda/test_queue.cpp +++ b/sycl/unittests/pi/cuda/test_queue.cpp @@ -10,16 +10,18 @@ #include +#include "TestGetPlatforms.hpp" #include "TestGetPlugin.hpp" #include +#include #include #include #include #include -using namespace cl::sycl; +using namespace sycl; -struct CudaTestQueue : public ::testing::Test { +struct CudaTestQueue : public ::testing::TestWithParam { protected: detail::plugin plugin = pi::initializeAndGet(backend::cuda); @@ -149,3 +151,15 @@ TEST_F(CudaTestQueue, PICreateQueueInterop) { ASSERT_EQ((plugin.call_nocheck(queue)), PI_SUCCESS); } + +TEST_P(CudaTestQueue, SYCLQueueDefaultStream) { + std::vector CudaDevices = GetParam().get_devices(); + auto deviceA_ = CudaDevices[0]; + queue Queue(deviceA_, async_handler{}, + {property::queue::cuda::use_default_stream{}}); + + CUstream CudaStream = get_native(Queue); + unsigned int flags; + cuStreamGetFlags(CudaStream, &flags); + ASSERT_EQ(flags, CU_STREAM_DEFAULT); +}