forked from intel/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvadd_2d.cpp
More file actions
101 lines (82 loc) · 3.2 KB
/
vadd_2d.cpp
File metadata and controls
101 lines (82 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
//==---------------- vadd_2d.cpp - DPC++ ESIMD on-device test -------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// TODO enable on Windows
// REQUIRES: linux && gpu
// UNSUPPORTED: cuda
// RUN: %clangxx-esimd -fsycl %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
#include "esimd_test_utils.hpp"
#include <CL/sycl.hpp>
#include <CL/sycl/INTEL/esimd.hpp>
#include <iostream>
using namespace cl::sycl;
int main(void) {
constexpr unsigned Size = 256;
constexpr unsigned VL = 8;
constexpr unsigned GroupSize = 2;
int A[Size];
int B[Size];
int C[Size] = {};
for (unsigned i = 0; i < Size; ++i) {
A[i] = B[i] = i;
}
try {
cl::sycl::image<2> imgA(A, image_channel_order::rgba,
image_channel_type::unsigned_int32,
range<2>{Size / 4, 1});
cl::sycl::image<2> imgB(B, image_channel_order::rgba,
image_channel_type::unsigned_int32,
range<2>{Size / 4, 1});
cl::sycl::image<2> imgC(C, image_channel_order::rgba,
image_channel_type::unsigned_int32,
range<2>{Size / 4, 1});
// We need that many workitems
cl::sycl::range<1> GlobalRange{(Size / VL)};
// Number of workitems in a workgroup
cl::sycl::range<1> LocalRange{GroupSize};
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
auto e = q.submit([&](cl::sycl::handler &cgh) {
auto accA = imgA.get_access<uint4, cl::sycl::access::mode::read>(cgh);
auto accB = imgB.get_access<uint4, cl::sycl::access::mode::read>(cgh);
auto accC = imgC.get_access<uint4, cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<class Test>(
GlobalRange * LocalRange, [=](id<1> i) SYCL_ESIMD_KERNEL {
using namespace sycl::INTEL::gpu;
constexpr int ESIZE = sizeof(int);
int x = i * ESIZE * VL;
int y = 0;
simd<int, VL> va;
auto va_ref = va.format<int, 1, VL>();
va_ref = media_block_load<int, 1, VL>(accA, x, y);
simd<int, VL> vb;
auto vb_ref = vb.format<int, 1, VL>();
vb_ref = media_block_load<int, 1, VL>(accB, x, y);
simd<int, VL> vc;
auto vc_ref = vc.format<int, 1, VL>();
vc_ref = va_ref + vb_ref;
media_block_store<int, 1, VL>(accC, x, y, vc_ref);
});
});
e.wait();
} catch (cl::sycl::exception const &e) {
std::cout << "SYCL exception caught: " << e.what() << '\n';
return e.get_cl_code();
}
for (unsigned i = 0; i < Size; ++i) {
if (A[i] + B[i] != C[i]) {
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
<< " + " << B[i] << "\n";
return 1;
}
}
std::cout << "Passed\n";
return 0;
}