summaryrefslogtreecommitdiff
path: root/vector_sum/main.cu
diff options
context:
space:
mode:
Diffstat (limited to 'vector_sum/main.cu')
-rw-r--r--vector_sum/main.cu62
1 files changed, 62 insertions, 0 deletions
diff --git a/vector_sum/main.cu b/vector_sum/main.cu
new file mode 100644
index 0000000..ba2e964
--- /dev/null
+++ b/vector_sum/main.cu
@@ -0,0 +1,62 @@
1#include <cstdio>
2
3__global__ void add(int N, int* a, int* b, int* out) {
4 const int id = blockIdx.x;
5 out[id] = a[id] + b[id];
6}
7
8int main() {
9 constexpr int N = 100;
10
11 bool success = false;
12 int host_array[N] = {0};
13 int* dev_arrays[3] = {nullptr};
14
15 // Allocate device arrays.
16 for (int i = 0; i < 3; ++i) {
17 if (cudaMalloc(&dev_arrays[i], N * sizeof(int)) != cudaSuccess) {
18 goto cleanup;
19 }
20 }
21
22 // Fill the host array with values 0..N-1.
23 for (int i = 0; i < N; ++i) {
24 host_array[i] = i;
25 }
26
27 // Copy the host array to each of the first two device arrays.
28 for (int i = 0; i < 2; ++i) {
29 if (cudaMemcpy(
30 dev_arrays[i], host_array, N * sizeof(int),
31 cudaMemcpyHostToDevice) != cudaSuccess) {
32 goto cleanup;
33 }
34 }
35
36 // Add the first two arrays.
37 // N blocks, 1 thread per block.
38 add<<<N, 1>>>(N, dev_arrays[0], dev_arrays[1], dev_arrays[2]);
39
40 // Copy the result from the third array to the host.
41 if (cudaMemcpy(
42 host_array, dev_arrays[2], N * sizeof(int), cudaMemcpyDeviceToHost) !=
43 cudaSuccess) {
44 goto cleanup;
45 }
46
47 // Print the result.
48 for (int i = 0; i < N; ++i) {
49 printf("%d ", host_array[i]);
50 }
51 printf("\n");
52
53 success = true;
54
55cleanup:
56 for (int i = 0; i < 3; ++i) {
57 if (dev_arrays[i] != nullptr) {
58 cudaFree(dev_arrays[i]);
59 }
60 }
61 return success ? 0 : 1;
62}