Initial commit.

author: 3gg <3gg@shellblade.net> 2024-11-27 13:41:09 -0800
committer: 3gg <3gg@shellblade.net> 2024-11-27 13:41:09 -0800
commit: c8be8496c8a15d0ede8338939a7512109b8e5e46 (patch)
tree: 1e60112652e9f3c3a20e6bf4cc0b8bef0ebc81fd
7 files changed, 268 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..c3ae680
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,6 @@
+cmake_minimum_required(VERSION 3.28)
+add_subdirectory(hello)
+add_subdirectory(julia)
+add_subdirectory(ptracer)
+add_subdirectory(vector_sum)
diff --git a/hello/CMakeLists.txt b/hello/CMakeLists.txt
new file mode 100644
index 0000000..e4b4acc
--- /dev/null
+++ b/hello/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.28)
+project(cuda_hello LANGUAGES CUDA CXX)
+add_executable(cuda_hello
+  hello.cu)
+# -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific)
+# host code.
+# https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex
+target_compile_options(cuda_hello PRIVATE -Wall -Wextra -Wno-pedantic)
diff --git a/hello/hello.cu b/hello/hello.cu
new file mode 100644
index 0000000..691b18c
--- /dev/null
+++ b/hello/hello.cu
@@ -0,0 +1,59 @@
+#include <cstdio>
+void logDevices() {
+  int count;
+  if (cudaGetDeviceCount(&count) != cudaSuccess) {
+    printf("No CUDA devices found\n");
+    return;
+  }
+  printf("CUDA devices found: %d\n", count);
+  for (int i = 0; i < count; ++i) {
+    cudaDeviceProp properties;
+    if (cudaGetDeviceProperties(&properties, i) == cudaSuccess) {
+      printf("Device [%d]: %s\n", i, properties.name);
+    }
+  }
+}
+__global__ void kernel(int* array, int N) {
+  for (int i = 0; i < N; ++i) {
+    array[i] = i;
+  }
+}
+int main() {
+  logDevices();
+  constexpr int N = 100;
+  int* host_array   = new int[N];
+  int* device_array = nullptr;
+  bool success      = false;
+  if (cudaMalloc(&device_array, N * sizeof(int)) != cudaSuccess) {
+    goto cleanup;
+  }
+  kernel<<<1, 1>>>(device_array, N);
+  if (cudaMemcpy(
+          host_array, device_array, N * sizeof(int), cudaMemcpyDeviceToHost) !=
+      cudaSuccess) {
+    goto cleanup;
+  }
+  for (int i = 0; i < N; ++i) {
+    printf("%d ", host_array[i]);
+  }
+  printf("\n");
+  success = true;
+cleanup:
+  delete[] host_array;
+  if (device_array != nullptr) {
+    cudaFree(device_array);
+  }
+  return success ? 0 : 1;
+}
diff --git a/julia/CMakeLists.txt b/julia/CMakeLists.txt
new file mode 100644
index 0000000..e5428fb
--- /dev/null
+++ b/julia/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.28)
+project(cuda_julia LANGUAGES CUDA CXX)
+add_executable(cuda_julia
+  julia.cu)
+# -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific)
+# host code.
+# https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex
+target_compile_options(cuda_julia PRIVATE -Wall -Wextra -Wno-pedantic)
diff --git a/julia/julia.cu b/julia/julia.cu
new file mode 100644
index 0000000..f3ecb80
--- /dev/null
+++ b/julia/julia.cu
@@ -0,0 +1,108 @@
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+struct Pixel {
+  uint8_t r, g, b;
+};
+struct Complex {
+  float r, i;
+  __device__ float norm2() const { return r * r + i * i; }
+};
+__device__ Complex operator*(Complex a, Complex b) {
+  return Complex{(a.r * b.r) - (a.i * b.i), (a.i * b.r) + (a.r * b.i)};
+}
+__device__ Complex operator+(Complex a, Complex b) {
+  return Complex{a.r + b.r, a.i + b.i};
+}
+__device__ int julia(int width, int height, int x, int y) {
+  constexpr float scale = 1.5;
+  constexpr int   N     = 200;
+  const float jx = scale * (width / 2 - x) / (width / 2);
+  const float jy = scale * (height / 2 - y) / (height / 2);
+  const Complex c{-0.8, 0.156};
+  Complex       a{jx, jy};
+  for (int i = 0; i < N; ++i) {
+    a = a * a + c;
+    if (a.norm2() > 1000) {
+      return 0;
+    }
+  }
+  return 1;
+}
+__global__ void juliaMain(int width, int height, Pixel* image) {
+  const int x = blockIdx.x;
+  const int y = blockIdx.y;
+  constexpr Pixel background{41, 95, 152};
+  constexpr Pixel juliaColour{228, 192, 135};
+  const Pixel pixel =
+      julia(width, height, x, y) == 1 ? juliaColour : background;
+  image[y * width + x] = pixel;
+}
+bool write_pbm(const Pixel* image, int width, int height, const char* path) {
+  const size_t num_pixels = width * height;
+  FILE* file = fopen(path, "wb");
+  if (!file) {
+    return false;
+  }
+  fprintf(file, "P6\n%d %d\n255\n", width, height);
+  if (fwrite(image, sizeof(Pixel), num_pixels, file) != num_pixels) {
+    fclose(file);
+    return false;
+  }
+  fclose(file);
+  return true;
+}
+int main(int argc, const char** argv) {
+  const int width  = argc > 1 ? atoi(argv[1]) : 1920;
+  const int height = argc > 2 ? atoi(argv[2]) : 1080;
+  bool success = false;
+  const dim3 dim(width, height);
+  const int  image_size_bytes = width * height * sizeof(Pixel);
+  auto       image_host       = new Pixel[width * height];
+  Pixel*     image_dev        = nullptr;
+  if (cudaMalloc(&image_dev, image_size_bytes) != cudaSuccess) {
+    goto cleanup;
+  }
+  juliaMain<<<dim, 1>>>(width, height, image_dev);
+  if (cudaMemcpy(
+          image_host, image_dev, image_size_bytes, cudaMemcpyDeviceToHost) !=
+      cudaSuccess) {
+    goto cleanup;
+  }
+  if (!write_pbm(image_host, width, height, "julia.pbm")) {
+    goto cleanup;
+  }
+  success = true;
+cleanup:
+  delete[] image_host;
+  if (image_dev) {
+    cudaFree(image_dev);
+  }
+  return success ? 0 : 1;
+}
diff --git a/vector_sum/CMakeLists.txt b/vector_sum/CMakeLists.txt
new file mode 100644
index 0000000..1eea51b
--- /dev/null
+++ b/vector_sum/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.28)
+project(vector_sum LANGUAGES CUDA CXX)
+add_executable(vector_sum
+  main.cu)
+# -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific)
+# host code.
+# https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex
+target_compile_options(vector_sum PRIVATE -Wall -Wextra -Wno-pedantic)
diff --git a/vector_sum/main.cu b/vector_sum/main.cu
new file mode 100644
index 0000000..ba2e964
--- /dev/null
+++ b/vector_sum/main.cu
@@ -0,0 +1,62 @@
+#include <cstdio>
+__global__ void add(int N, int* a, int* b, int* out) {
+  const int id = blockIdx.x;
+  out[id]      = a[id] + b[id];
+}
+int main() {
+  constexpr int N = 100;
+  bool success       = false;
+  int  host_array[N] = {0};
+  int* dev_arrays[3] = {nullptr};
+  // Allocate device arrays.
+  for (int i = 0; i < 3; ++i) {
+    if (cudaMalloc(&dev_arrays[i], N * sizeof(int)) != cudaSuccess) {
+      goto cleanup;
+    }
+  }
+  // Fill the host array with values 0..N-1.
+  for (int i = 0; i < N; ++i) {
+    host_array[i] = i;
+  }
+  // Copy the host array to each of the first two device arrays.
+  for (int i = 0; i < 2; ++i) {
+    if (cudaMemcpy(
+            dev_arrays[i], host_array, N * sizeof(int),
+            cudaMemcpyHostToDevice) != cudaSuccess) {
+      goto cleanup;
+    }
+  }
+  // Add the first two arrays.
+  // N blocks, 1 thread per block.
+  add<<<N, 1>>>(N, dev_arrays[0], dev_arrays[1], dev_arrays[2]);
+  // Copy the result from the third array to the host.
+  if (cudaMemcpy(
+          host_array, dev_arrays[2], N * sizeof(int), cudaMemcpyDeviceToHost) !=
+      cudaSuccess) {
+    goto cleanup;
+  }
+  // Print the result.
+  for (int i = 0; i < N; ++i) {
+    printf("%d ", host_array[i]);
+  }
+  printf("\n");
+  success = true;
+cleanup:
+  for (int i = 0; i < 3; ++i) {
+    if (dev_arrays[i] != nullptr) {
+      cudaFree(dev_arrays[i]);
+    }
+  }
+  return success ? 0 : 1;
+}
author	3gg <3gg@shellblade.net>	2024-11-27 13:41:09 -0800
committer	3gg <3gg@shellblade.net>	2024-11-27 13:41:09 -0800
commit	c8be8496c8a15d0ede8338939a7512109b8e5e46 (patch)
tree	1e60112652e9f3c3a20e6bf4cc0b8bef0ebc81fd

diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..c3ae680 --- /dev/null +++ b/CMakeLists.txt
@@ -0,0 +1,6 @@
	1	cmake_minimum_required(VERSION 3.28)
	2
	3	add_subdirectory(hello)
	4	add_subdirectory(julia)
	5	add_subdirectory(ptracer)
	6	add_subdirectory(vector_sum)


diff --git a/hello/CMakeLists.txt b/hello/CMakeLists.txt new file mode 100644 index 0000000..e4b4acc --- /dev/null +++ b/hello/CMakeLists.txt
@@ -0,0 +1,11 @@
	1	cmake_minimum_required(VERSION 3.28)
	2
	3	project(cuda_hello LANGUAGES CUDA CXX)
	4
	5	add_executable(cuda_hello
	6	hello.cu)
	7
	8	# -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific)
	9	# host code.
	10	# https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex
	11	target_compile_options(cuda_hello PRIVATE -Wall -Wextra -Wno-pedantic)


diff --git a/hello/hello.cu b/hello/hello.cu new file mode 100644 index 0000000..691b18c --- /dev/null +++ b/hello/hello.cu
@@ -0,0 +1,59 @@
	1	#include <cstdio>
	2
	3	void logDevices() {
	4	int count;
	5	if (cudaGetDeviceCount(&count) != cudaSuccess) {
	6	printf("No CUDA devices found\n");
	7	return;
	8	}
	9
	10	printf("CUDA devices found: %d\n", count);
	11	for (int i = 0; i < count; ++i) {
	12	cudaDeviceProp properties;
	13	if (cudaGetDeviceProperties(&properties, i) == cudaSuccess) {
	14	printf("Device [%d]: %s\n", i, properties.name);
	15	}
	16	}
	17	}
	18
	19	__global__ void kernel(int* array, int N) {
	20	for (int i = 0; i < N; ++i) {
	21	array[i] = i;
	22	}
	23	}
	24
	25	int main() {
	26	logDevices();
	27
	28	constexpr int N = 100;
	29
	30	int* host_array = new int[N];
	31	int* device_array = nullptr;
	32	bool success = false;
	33
	34	if (cudaMalloc(&device_array, N * sizeof(int)) != cudaSuccess) {
	35	goto cleanup;
	36	}
	37
	38	kernel<<<1, 1>>>(device_array, N);
	39
	40	if (cudaMemcpy(
	41	host_array, device_array, N * sizeof(int), cudaMemcpyDeviceToHost) !=
	42	cudaSuccess) {
	43	goto cleanup;
	44	}
	45
	46	for (int i = 0; i < N; ++i) {
	47	printf("%d ", host_array[i]);
	48	}
	49	printf("\n");
	50
	51	success = true;
	52
	53	cleanup:
	54	delete[] host_array;
	55	if (device_array != nullptr) {
	56	cudaFree(device_array);
	57	}
	58	return success ? 0 : 1;
	59	}


diff --git a/julia/CMakeLists.txt b/julia/CMakeLists.txt new file mode 100644 index 0000000..e5428fb --- /dev/null +++ b/julia/CMakeLists.txt
@@ -0,0 +1,11 @@
	1	cmake_minimum_required(VERSION 3.28)
	2
	3	project(cuda_julia LANGUAGES CUDA CXX)
	4
	5	add_executable(cuda_julia
	6	julia.cu)
	7
	8	# -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific)
	9	# host code.
	10	# https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex
	11	target_compile_options(cuda_julia PRIVATE -Wall -Wextra -Wno-pedantic)


diff --git a/julia/julia.cu b/julia/julia.cu new file mode 100644 index 0000000..f3ecb80 --- /dev/null +++ b/julia/julia.cu
@@ -0,0 +1,108 @@
	1	#include <cstdint>
	2	#include <cstdio>
	3	#include <cstdlib>
	4
	5	struct Pixel {
	6	uint8_t r, g, b;
	7	};
	8
	9	struct Complex {
	10	float r, i;
	11
	12	__device__ float norm2() const { return r * r + i * i; }
	13	};
	14
	15	__device__ Complex operator*(Complex a, Complex b) {
	16	return Complex{(a.r * b.r) - (a.i * b.i), (a.i * b.r) + (a.r * b.i)};
	17	}
	18
	19	__device__ Complex operator+(Complex a, Complex b) {
	20	return Complex{a.r + b.r, a.i + b.i};
	21	}
	22
	23	__device__ int julia(int width, int height, int x, int y) {
	24	constexpr float scale = 1.5;
	25	constexpr int N = 200;
	26
	27	const float jx = scale * (width / 2 - x) / (width / 2);
	28	const float jy = scale * (height / 2 - y) / (height / 2);
	29
	30	const Complex c{-0.8, 0.156};
	31	Complex a{jx, jy};
	32
	33	for (int i = 0; i < N; ++i) {
	34	a = a * a + c;
	35	if (a.norm2() > 1000) {
	36	return 0;
	37	}
	38	}
	39	return 1;
	40	}
	41
	42	__global__ void juliaMain(int width, int height, Pixel* image) {
	43	const int x = blockIdx.x;
	44	const int y = blockIdx.y;
	45
	46	constexpr Pixel background{41, 95, 152};
	47	constexpr Pixel juliaColour{228, 192, 135};
	48
	49	const Pixel pixel =
	50	julia(width, height, x, y) == 1 ? juliaColour : background;
	51
	52	image[y * width + x] = pixel;
	53	}
	54
	55	bool write_pbm(const Pixel* image, int width, int height, const char* path) {
	56	const size_t num_pixels = width * height;
	57
	58	FILE* file = fopen(path, "wb");
	59	if (!file) {
	60	return false;
	61	}
	62
	63	fprintf(file, "P6\n%d %d\n255\n", width, height);
	64	if (fwrite(image, sizeof(Pixel), num_pixels, file) != num_pixels) {
	65	fclose(file);
	66	return false;
	67	}
	68
	69	fclose(file);
	70	return true;
	71	}
	72
	73	int main(int argc, const char** argv) {
	74	const int width = argc > 1 ? atoi(argv[1]) : 1920;
	75	const int height = argc > 2 ? atoi(argv[2]) : 1080;
	76
	77	bool success = false;
	78
	79	const dim3 dim(width, height);
	80	const int image_size_bytes = width * height * sizeof(Pixel);
	81	auto image_host = new Pixel[width * height];
	82	Pixel* image_dev = nullptr;
	83
	84	if (cudaMalloc(&image_dev, image_size_bytes) != cudaSuccess) {
	85	goto cleanup;
	86	}
	87
	88	juliaMain<<<dim, 1>>>(width, height, image_dev);
	89
	90	if (cudaMemcpy(
	91	image_host, image_dev, image_size_bytes, cudaMemcpyDeviceToHost) !=
	92	cudaSuccess) {
	93	goto cleanup;
	94	}
	95
	96	if (!write_pbm(image_host, width, height, "julia.pbm")) {
	97	goto cleanup;
	98	}
	99
	100	success = true;
	101
	102	cleanup:
	103	delete[] image_host;
	104	if (image_dev) {
	105	cudaFree(image_dev);
	106	}
	107	return success ? 0 : 1;
	108	}


diff --git a/vector_sum/CMakeLists.txt b/vector_sum/CMakeLists.txt new file mode 100644 index 0000000..1eea51b --- /dev/null +++ b/vector_sum/CMakeLists.txt
@@ -0,0 +1,11 @@
	1	cmake_minimum_required(VERSION 3.28)
	2
	3	project(vector_sum LANGUAGES CUDA CXX)
	4
	5	add_executable(vector_sum
	6	main.cu)
	7
	8	# -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific)
	9	# host code.
	10	# https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex
	11	target_compile_options(vector_sum PRIVATE -Wall -Wextra -Wno-pedantic)


diff --git a/vector_sum/main.cu b/vector_sum/main.cu new file mode 100644 index 0000000..ba2e964 --- /dev/null +++ b/vector_sum/main.cu
@@ -0,0 +1,62 @@
	1	#include <cstdio>
	2
	3	__global__ void add(int N, int* a, int* b, int* out) {
	4	const int id = blockIdx.x;
	5	out[id] = a[id] + b[id];
	6	}
	7
	8	int main() {
	9	constexpr int N = 100;
	10
	11	bool success = false;
	12	int host_array[N] = {0};
	13	int* dev_arrays[3] = {nullptr};
	14
	15	// Allocate device arrays.
	16	for (int i = 0; i < 3; ++i) {
	17	if (cudaMalloc(&dev_arrays[i], N * sizeof(int)) != cudaSuccess) {
	18	goto cleanup;
	19	}
	20	}
	21
	22	// Fill the host array with values 0..N-1.
	23	for (int i = 0; i < N; ++i) {
	24	host_array[i] = i;
	25	}
	26
	27	// Copy the host array to each of the first two device arrays.
	28	for (int i = 0; i < 2; ++i) {
	29	if (cudaMemcpy(
	30	dev_arrays[i], host_array, N * sizeof(int),
	31	cudaMemcpyHostToDevice) != cudaSuccess) {
	32	goto cleanup;
	33	}
	34	}
	35
	36	// Add the first two arrays.
	37	// N blocks, 1 thread per block.
	38	add<<<N, 1>>>(N, dev_arrays[0], dev_arrays[1], dev_arrays[2]);
	39
	40	// Copy the result from the third array to the host.
	41	if (cudaMemcpy(
	42	host_array, dev_arrays[2], N * sizeof(int), cudaMemcpyDeviceToHost) !=
	43	cudaSuccess) {
	44	goto cleanup;
	45	}
	46
	47	// Print the result.
	48	for (int i = 0; i < N; ++i) {
	49	printf("%d ", host_array[i]);
	50	}
	51	printf("\n");
	52
	53	success = true;
	54
	55	cleanup:
	56	for (int i = 0; i < 3; ++i) {
	57	if (dev_arrays[i] != nullptr) {
	58	cudaFree(dev_arrays[i]);
	59	}
	60	}
	61	return success ? 0 : 1;
	62	}