forked from TeachingUndergradsCHC/modules
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaddVectors.cpp
More file actions
78 lines (66 loc) · 2.15 KB
/
addVectors.cpp
File metadata and controls
78 lines (66 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*
* Sample program that uses SYCL to perform element-wise add of two
* vectors. Each element is the responsibility of a separate thread.
*
* Based on the example CUDA program and on
* https://github.com/illuhad/hipSYCL/blob/develop/doc/examples.md
*
* compile with:
* syclcc -o addVectors addVectors.cpp
* run with:
* ./addVectors
*/
#include <iostream>
#include <CL/sycl.hpp>
#include <chrono>
using data_type = int;
//problem size (vector length):
#define N 10
std::vector<data_type> kernel(cl::sycl::queue& q,
const std::vector<data_type>& a,
const std::vector<data_type>& b) {
//function that runs on GPU/CPU to do the addition
std::vector<data_type> c(a.size());
cl::sycl::range<1> work_items{a.size()};
{
// copy data to compute device
cl::sycl::buffer<data_type> buff_a(a.data(), a.size());
cl::sycl::buffer<data_type> buff_b(b.data(), b.size());
cl::sycl::buffer<data_type> buff_c(c.data(), c.size());
// sets c[i] = a[i] + b[i]
// each thread is responsible for one value of i
q.submit([&](cl::sycl::handler &cgh){
auto access_a = buff_a.get_access<cl::sycl::access::mode::read>(cgh);
auto access_b = buff_b.get_access<cl::sycl::access::mode::read>(cgh);
auto access_c = buff_c.get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<class vector_add>(work_items,
[=] (cl::sycl::id<1> tid) {
access_c[tid] = access_a[tid] + access_b[tid];
});
});
}
return c;
}
int main() {
std::vector<data_type> a(N); //input arrays
std::vector<data_type> b(N);
std::vector<data_type> res(N); //output array
//setup command queue
cl::sycl::queue q;
//set up contents of a and b
for(int i=0; i < N; i++) {
a[i] = i;
b[i] = i;
}
//start timer
auto start = std::chrono::high_resolution_clock::now();
//call the kernel
res = kernel(q, a, b);
//stop timer and print time
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> diff = stop - start;
std::cout << "time: " << diff.count() << " ms" << std::endl;
//verify results
for(int i=0; i < N; i++)
std::cout << res[i] << std::endl;
}