forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathasm_16_matrix_mult.cpp
More file actions
44 lines (37 loc) · 1.18 KB
/
asm_16_matrix_mult.cpp
File metadata and controls
44 lines (37 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
// UNSUPPORTED: cuda
// REQUIRES: gpu,linux
// RUN: %clangxx -fsycl %s -DINLINE_ASM -o %t.out
// RUN: %t.out
// RUN: %clangxx -fsycl %s -o %t.ref.out
// RUN: %t.ref.out
#include "include/asmhelper.h"
#include <CL/sycl.hpp>
#include <iostream>
#include <vector>
using dataType = cl::sycl::cl_int;
template <typename T = dataType>
struct KernelFunctor : WithOutputBuffer<T> {
KernelFunctor(size_t problem_size) : WithOutputBuffer<T>(problem_size) {}
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
volatile int output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
: "=rw"(output));
#else
output = 7;
#endif
C[wiID] = output;
});
}
};
int main() {
KernelFunctor<> f(DEFAULT_PROBLEM_SIZE);
if (!launchInlineASMTest(f))
return 0;
if (verify_all_the_same(f.getOutputBufferData(), 7))
return 0;
return 1;
}