Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,7 @@
# Temporaries
*~
*#
*/build
*/build
]
# vim
*.swp
36 changes: 10 additions & 26 deletions examples/sgemm_interop/sycl_sgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
#include <iostream>
#include <vector>

#include <CL/sycl.hpp>
#include <CL/sycl/backend/cuda.hpp>
#include <sycl/sycl.hpp>

#include <cublas_v2.h>
#include <cuda.h>
Expand All @@ -47,25 +46,6 @@ void inline checkCudaErrorMsg(cudaError status, const char *msg) {
}
}

void inline checkCudaErrorMsg(CUresult status, const char *msg) {
if (status != CUDA_SUCCESS) {
std::cout << "ERROR CUDA: " << msg << " - " << status << std::endl;
exit(EXIT_FAILURE);
}
}

class CUDASelector : public sycl::device_selector {
public:
int operator()(const sycl::device &device) const override {
if(device.get_platform().get_backend() == sycl::backend::ext_oneapi_cuda){
std::cout << " CUDA device found " << std::endl;
return 1;
} else{
return -1;
}
}
};

int main() {
using namespace sycl;

Expand All @@ -88,7 +68,9 @@ int main() {
// B is a matrix fill with 1
std::fill(std::begin(h_B), std::end(h_B), 1.0f);

sycl::queue q{CUDASelector()};
sycl::queue q{[](auto &d) {
return (d.get_platform().get_backend() == sycl::backend::ext_oneapi_cuda);
}};

cublasHandle_t handle;
CHECK_ERROR(cublasCreate(&handle));
Expand All @@ -104,12 +86,14 @@ int main() {
auto d_C = b_C.get_access<sycl::access::mode::write>(h);

h.host_task([=](sycl::interop_handle ih) {
cuCtxSetCurrent(ih.get_native_context<backend::ext_oneapi_cuda>());
auto cuStream = ih.get_native_queue<backend::ext_oneapi_cuda>();
cublasSetStream(handle, cuStream);
auto cuA = reinterpret_cast<float *>(ih.get_native_mem<backend::ext_oneapi_cuda>(d_A));
auto cuB = reinterpret_cast<float *>(ih.get_native_mem<backend::ext_oneapi_cuda>(d_B));
auto cuC = reinterpret_cast<float *>(ih.get_native_mem<backend::ext_oneapi_cuda>(d_C));
auto cuA = reinterpret_cast<float *>(
ih.get_native_mem<backend::ext_oneapi_cuda>(d_A));
auto cuB = reinterpret_cast<float *>(
ih.get_native_mem<backend::ext_oneapi_cuda>(d_B));
auto cuC = reinterpret_cast<float *>(
ih.get_native_mem<backend::ext_oneapi_cuda>(d_C));

CHECK_ERROR(cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, WIDTH, HEIGHT,
WIDTH, &ALPHA, cuA, WIDTH, cuB, WIDTH, &BETA,
Expand Down
27 changes: 3 additions & 24 deletions examples/sgemm_interop/sycl_sgemm_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@
#include <iostream>
#include <vector>

#include <CL/sycl.hpp>
#include <CL/sycl/backend/cuda.hpp>
#include <sycl/sycl.hpp>

#include <cublas_v2.h>
#include <cuda.h>
#include <cublas_v2.h>

#define CHECK_ERROR(FUNC) checkCudaErrorMsg(FUNC, " " #FUNC)

Expand All @@ -47,24 +46,6 @@ void inline checkCudaErrorMsg(cudaError status, const char *msg) {
}
}

void inline checkCudaErrorMsg(CUresult status, const char *msg) {
if (status != CUDA_SUCCESS) {
std::cout << "ERROR CUDA: " << msg << " - " << status << std::endl;
exit(EXIT_FAILURE);
}
}

class CUDASelector : public sycl::device_selector {
public:
int operator()(const sycl::device &device) const override {
if(device.get_platform().get_backend() == sycl::backend::ext_oneapi_cuda){
std::cout << " CUDA device found " << std::endl;
return 1;
} else{
return -1;
}
}
};

int main() {
using namespace sycl;
Expand All @@ -88,7 +69,7 @@ int main() {
// B is a matrix fill with 1
std::fill(std::begin(h_B), std::end(h_B), 1.0f);

sycl::queue q{CUDASelector()};
sycl::queue q{[](auto& d) { return (d.get_platform().get_backend() == sycl::backend::ext_oneapi_cuda); }};

// Allocate memory on the device
float* d_A = sycl::malloc_device<float>(WIDTH*HEIGHT,q);
Expand All @@ -107,9 +88,7 @@ int main() {
q.submit([&](handler &h) {

h.host_task([=](sycl::interop_handle ih) {

// Set the correct cuda context & stream
cuCtxSetCurrent(ih.get_native_context<backend::ext_oneapi_cuda>());
auto cuStream = ih.get_native_queue<backend::ext_oneapi_cuda>();
cublasSetStream(handle, cuStream);

Expand Down
3 changes: 0 additions & 3 deletions examples/vector_addition/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ to highlight how to build an application with SYCL for CUDA using DPC++ support,
for which an example CMakefile is provided. For detailed documentation on how to
migrate from CUDA to SYCL, see [SYCL For CUDA Developers](https://developer.codeplay.com/products/computecpp/ce/guides/sycl-for-cuda-developers).

Note currently the CUDA backend does not support the [USM](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc) extension, so we use
`sycl::buffer` and `sycl::accessors` instead.

Pre-requisites
---------------

Expand Down
12 changes: 7 additions & 5 deletions examples/vector_addition/vector_addition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ int main(int argc, char *argv[]) {

// Initialize input data
{
const auto dwrite_t = sycl::access::mode::discard_write;
const auto dwrite_t = sycl::write_only;


sycl::host_accessor h_a{bufA, sycl::write_only};
sycl::host_accessor h_b{bufB, sycl::write_only};

auto h_a = bufA.get_access<dwrite_t>();
auto h_b = bufB.get_access<dwrite_t>();
for (int i = 0; i < N; i++) {
h_a[i] = sin(i) * sin(i);
h_b[i] = cos(i) * cos(i);
Expand Down Expand Up @@ -70,8 +72,8 @@ int main(int argc, char *argv[]) {
myQueue.submit(cg);

{
const auto read_t = sycl::access::mode::read;
auto h_c = bufC.get_access<read_t>();
sycl::host_accessor h_c{bufC, sycl::read_only};

double sum = 0.0f;
for (int i = 0; i < N; i++) {
sum += h_c[i];
Expand Down