-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathadd.cu
More file actions
47 lines (40 loc) · 976 Bytes
/
Copy pathadd.cu
File metadata and controls
47 lines (40 loc) · 976 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include <iostream>
__global__ void add(int n , float* a, float* b, float* c)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
c[i] = a[i] + b[i];
}
}
int main()
{
int N = 4096;
int BLOCK_SIZE=256;
float* a = new float[N];
float* b = new float[N];
float* c = new float[N];
for (int i = 0; i<N; i++)
{
a[i] = i;
b[i] = 2*i;
}
float* a_d;
float* b_d;
float* c_d;
cudaMalloc((void**) &a_d, N*sizeof(float));
cudaMalloc((void**) &b_d, N*sizeof(float));
cudaMalloc((void**) &c_d, N*sizeof(float));
cudaMemcpy(a_d, a, N*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(b_d, b, N*sizeof(float), cudaMemcpyHostToDevice);
add<<<ceil(N/(float)BLOCK_SIZE), BLOCK_SIZE>>>(N, a_d, b_d, c_d);
cudaMemcpy(c, c_d, N*sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i<10; i++)
{
std::cout<<a[i]<<" "<<b[i]<<" "<<c[i]<<std::endl;
}
cudaFree(a_d);
cudaFree(b_d);
cudaFree(c_d);
return 0;
}