diff options
author | 3gg <3gg@shellblade.net> | 2023-12-16 10:21:16 -0800 |
---|---|---|
committer | 3gg <3gg@shellblade.net> | 2023-12-16 10:21:16 -0800 |
commit | 653e98e029a0d0f110b0ac599e50406060bb0f87 (patch) | |
tree | 6f909215218f6720266bde1b3f49aeddad8b1da3 | |
parent | 3df7b6fb0c65295eed4590e6f166d60e89b3c68e (diff) |
Decouple activations from linear layer.
-rw-r--r-- | src/bin/mnist/src/main.c | 195 | ||||
-rw-r--r-- | src/lib/include/neuralnet/matrix.h | 3 | ||||
-rw-r--r-- | src/lib/include/neuralnet/neuralnet.h | 51 | ||||
-rw-r--r-- | src/lib/src/activation.h | 4 | ||||
-rw-r--r-- | src/lib/src/matrix.c | 6 | ||||
-rw-r--r-- | src/lib/src/neuralnet.c | 218 | ||||
-rw-r--r-- | src/lib/src/neuralnet_impl.h | 35 | ||||
-rw-r--r-- | src/lib/src/train.c | 182 | ||||
-rw-r--r-- | src/lib/test/neuralnet_test.c | 103 | ||||
-rw-r--r-- | src/lib/test/train_linear_perceptron_non_origin_test.c | 46 | ||||
-rw-r--r-- | src/lib/test/train_linear_perceptron_test.c | 44 | ||||
-rw-r--r-- | src/lib/test/train_sigmoid_test.c | 46 | ||||
-rw-r--r-- | src/lib/test/train_xor_test.c | 55 |
13 files changed, 559 insertions, 429 deletions
diff --git a/src/bin/mnist/src/main.c b/src/bin/mnist/src/main.c index 9aa3ce5..53e0197 100644 --- a/src/bin/mnist/src/main.c +++ b/src/bin/mnist/src/main.c | |||
@@ -29,32 +29,35 @@ static const double LABEL_UPPER_BOUND = 0.99; | |||
29 | // Epsilon used to compare R values. | 29 | // Epsilon used to compare R values. |
30 | static const double EPS = 1e-10; | 30 | static const double EPS = 1e-10; |
31 | 31 | ||
32 | #define min(a,b) ((a) < (b) ? (a) : (b)) | 32 | #define min(a, b) ((a) < (b) ? (a) : (b)) |
33 | 33 | ||
34 | typedef struct ImageSet { | 34 | typedef struct ImageSet { |
35 | nnMatrix images; // Images flattened into row vectors of the matrix. | 35 | nnMatrix images; // Images flattened into row vectors of the matrix. |
36 | nnMatrix labels; // One-hot-encoded labels. | 36 | nnMatrix labels; // One-hot-encoded labels. |
37 | int count; // Number of images and labels. | 37 | int count; // Number of images and labels. |
38 | int rows; // Rows in an image. | 38 | int rows; // Rows in an image. |
39 | int cols; // Columns in an image. | 39 | int cols; // Columns in an image. |
40 | } ImageSet; | 40 | } ImageSet; |
41 | 41 | ||
42 | static void usage(const char* argv0) { | 42 | static void usage(const char* argv0) { |
43 | fprintf(stderr, "Usage: %s <path to mnist files directory> [num images]\n", argv0); | 43 | fprintf( |
44 | stderr, "Usage: %s <path to mnist files directory> [num images]\n", | ||
45 | argv0); | ||
44 | fprintf(stderr, "\n"); | 46 | fprintf(stderr, "\n"); |
45 | fprintf(stderr, " Use -1 for [num images] to use all the images in the data set\n"); | 47 | fprintf( |
48 | stderr, | ||
49 | " Use -1 for [num images] to use all the images in the data set\n"); | ||
46 | } | 50 | } |
47 | 51 | ||
48 | static bool R_eq(R a, R b) { | 52 | static bool R_eq(R a, R b) { return fabs(a - b) <= EPS; } |
49 | return fabs(a-b) <= EPS; | ||
50 | } | ||
51 | 53 | ||
52 | static void PrintImage(const nnMatrix* images, int rows, int cols, int image_index) { | 54 | static void PrintImage( |
55 | const nnMatrix* images, int rows, int cols, int image_index) { | ||
53 | assert(images); | 56 | assert(images); |
54 | assert((0 <= image_index) && (image_index < images->rows)); | 57 | assert((0 <= image_index) && (image_index < images->rows)); |
55 | 58 | ||
56 | // Top line. | 59 | // Top line. |
57 | for (int j = 0; j < cols/2; ++j) { | 60 | for (int j = 0; j < cols / 2; ++j) { |
58 | printf(" -"); | 61 | printf(" -"); |
59 | } | 62 | } |
60 | printf("\n"); | 63 | printf("\n"); |
@@ -68,8 +71,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind | |||
68 | printf("#"); | 71 | printf("#"); |
69 | } else if (*value > 0.5) { | 72 | } else if (*value > 0.5) { |
70 | printf("*"); | 73 | printf("*"); |
71 | } | 74 | } else if (*value > PIXEL_LOWER_BOUND) { |
72 | else if (*value > PIXEL_LOWER_BOUND) { | ||
73 | printf(":"); | 75 | printf(":"); |
74 | } else if (*value == 0.0) { | 76 | } else if (*value == 0.0) { |
75 | // Values should not be exactly 0, otherwise they cancel out weights | 77 | // Values should not be exactly 0, otherwise they cancel out weights |
@@ -84,7 +86,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind | |||
84 | } | 86 | } |
85 | 87 | ||
86 | // Bottom line. | 88 | // Bottom line. |
87 | for (int j = 0; j < cols/2; ++j) { | 89 | for (int j = 0; j < cols / 2; ++j) { |
88 | printf(" -"); | 90 | printf(" -"); |
89 | } | 91 | } |
90 | printf("\n"); | 92 | printf("\n"); |
@@ -96,7 +98,7 @@ static void PrintLabel(const nnMatrix* labels, int label_index) { | |||
96 | 98 | ||
97 | // Compute the label from the one-hot encoding. | 99 | // Compute the label from the one-hot encoding. |
98 | const R* value = nnMatrixRow(labels, label_index); | 100 | const R* value = nnMatrixRow(labels, label_index); |
99 | int label = -1; | 101 | int label = -1; |
100 | for (int i = 0; i < 10; ++i) { | 102 | for (int i = 0; i < 10; ++i) { |
101 | if (R_eq(*value++, LABEL_UPPER_BOUND)) { | 103 | if (R_eq(*value++, LABEL_UPPER_BOUND)) { |
102 | label = i; | 104 | label = i; |
@@ -113,13 +115,12 @@ static void PrintLabel(const nnMatrix* labels, int label_index) { | |||
113 | printf(")\n"); | 115 | printf(")\n"); |
114 | } | 116 | } |
115 | 117 | ||
116 | static R lerp(R a, R b, R t) { | 118 | static R lerp(R a, R b, R t) { return a + t * (b - a); } |
117 | return a + t*(b-a); | ||
118 | } | ||
119 | 119 | ||
120 | /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. | 120 | /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. |
121 | static R FormatPixel(uint8_t pixel) { | 121 | static R FormatPixel(uint8_t pixel) { |
122 | const R value = (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; | 122 | const R value = |
123 | (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; | ||
123 | assert(value >= PIXEL_LOWER_BOUND); | 124 | assert(value >= PIXEL_LOWER_BOUND); |
124 | assert(value <= 1.0); | 125 | assert(value <= 1.0); |
125 | return value; | 126 | return value; |
@@ -152,7 +153,8 @@ static void ImageToMatrix( | |||
152 | } | 153 | } |
153 | } | 154 | } |
154 | 155 | ||
155 | static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_set) { | 156 | static bool ReadImages( |
157 | gzFile images_file, int max_num_images, ImageSet* image_set) { | ||
156 | assert(images_file != Z_NULL); | 158 | assert(images_file != Z_NULL); |
157 | assert(image_set); | 159 | assert(image_set); |
158 | 160 | ||
@@ -161,36 +163,41 @@ static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_s | |||
161 | uint8_t* pixels = 0; | 163 | uint8_t* pixels = 0; |
162 | 164 | ||
163 | int32_t magic, total_images, rows, cols; | 165 | int32_t magic, total_images, rows, cols; |
164 | if ( (gzread(images_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | 166 | if ((gzread(images_file, (char*)&magic, sizeof(int32_t)) != |
165 | (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != sizeof(int32_t)) || | 167 | sizeof(int32_t)) || |
166 | (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || | 168 | (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != |
167 | (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t)) ) { | 169 | sizeof(int32_t)) || |
170 | (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || | ||
171 | (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t))) { | ||
168 | fprintf(stderr, "Failed to read header\n"); | 172 | fprintf(stderr, "Failed to read header\n"); |
169 | goto cleanup; | 173 | goto cleanup; |
170 | } | 174 | } |
171 | 175 | ||
172 | magic = ReverseEndian32(magic); | 176 | magic = ReverseEndian32(magic); |
173 | total_images = ReverseEndian32(total_images); | 177 | total_images = ReverseEndian32(total_images); |
174 | rows = ReverseEndian32(rows); | 178 | rows = ReverseEndian32(rows); |
175 | cols = ReverseEndian32(cols); | 179 | cols = ReverseEndian32(cols); |
176 | 180 | ||
177 | if (magic != IMAGE_FILE_MAGIC) { | 181 | if (magic != IMAGE_FILE_MAGIC) { |
178 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | 182 | fprintf( |
179 | magic, IMAGE_FILE_MAGIC); | 183 | stderr, "Magic number mismatch. Got %x, expected: %x\n", magic, |
184 | IMAGE_FILE_MAGIC); | ||
180 | goto cleanup; | 185 | goto cleanup; |
181 | } | 186 | } |
182 | 187 | ||
183 | printf("Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", | 188 | printf( |
184 | magic, total_images, rows, cols); | 189 | "Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", magic, |
190 | total_images, rows, cols); | ||
185 | 191 | ||
186 | total_images = max_num_images >= 0 ? min(total_images, max_num_images) : total_images; | 192 | total_images = |
193 | max_num_images >= 0 ? min(total_images, max_num_images) : total_images; | ||
187 | 194 | ||
188 | // Images are flattened into single row vectors. | 195 | // Images are flattened into single row vectors. |
189 | const int num_pixels = rows * cols; | 196 | const int num_pixels = rows * cols; |
190 | image_set->images = nnMatrixMake(total_images, num_pixels); | 197 | image_set->images = nnMatrixMake(total_images, num_pixels); |
191 | image_set->count = total_images; | 198 | image_set->count = total_images; |
192 | image_set->rows = rows; | 199 | image_set->rows = rows; |
193 | image_set->cols = cols; | 200 | image_set->cols = cols; |
194 | 201 | ||
195 | pixels = calloc(1, num_pixels); | 202 | pixels = calloc(1, num_pixels); |
196 | if (!pixels) { | 203 | if (!pixels) { |
@@ -219,30 +226,31 @@ cleanup: | |||
219 | return success; | 226 | return success; |
220 | } | 227 | } |
221 | 228 | ||
222 | static void OneHotEncode(const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { | 229 | static void OneHotEncode( |
230 | const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { | ||
223 | assert(labels_bytes); | 231 | assert(labels_bytes); |
224 | assert(labels); | 232 | assert(labels); |
225 | assert(labels->rows == num_labels); | 233 | assert(labels->rows == num_labels); |
226 | assert(labels->cols == 10); | 234 | assert(labels->cols == 10); |
227 | 235 | ||
228 | static const R one_hot[10][10] = { | 236 | static const R one_hot[10][10] = { |
229 | { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, | 237 | {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
230 | { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, | 238 | {0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, |
231 | { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, | 239 | {0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, |
232 | { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, | 240 | {0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, |
233 | { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, | 241 | {0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, |
234 | { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, | 242 | {0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, |
235 | { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, | 243 | {0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, |
236 | { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, | 244 | {0, 0, 0, 0, 0, 0, 0, 1, 0, 0}, |
237 | { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }, | 245 | {0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, |
238 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, | 246 | {0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, |
239 | }; | 247 | }; |
240 | 248 | ||
241 | R* value = labels->values; | 249 | R* value = labels->values; |
242 | 250 | ||
243 | for (int i = 0; i < num_labels; ++i) { | 251 | for (int i = 0; i < num_labels; ++i) { |
244 | const uint8_t label = labels_bytes[i]; | 252 | const uint8_t label = labels_bytes[i]; |
245 | const R* one_hot_value = one_hot[label]; | 253 | const R* one_hot_value = one_hot[label]; |
246 | 254 | ||
247 | for (int j = 0; j < 10; ++j) { | 255 | for (int j = 0; j < 10; ++j) { |
248 | *value++ = FormatLabel(*one_hot_value++); | 256 | *value++ = FormatLabel(*one_hot_value++); |
@@ -255,13 +263,13 @@ static int OneHotDecode(const nnMatrix* label_matrix) { | |||
255 | assert(label_matrix->cols == 10); | 263 | assert(label_matrix->cols == 10); |
256 | assert(label_matrix->rows == 1); | 264 | assert(label_matrix->rows == 1); |
257 | 265 | ||
258 | R max_value = 0; | 266 | R max_value = 0; |
259 | int pos_max = 0; | 267 | int pos_max = 0; |
260 | for (int i = 0; i < 10; ++i) { | 268 | for (int i = 0; i < 10; ++i) { |
261 | const R value = nnMatrixAt(label_matrix, 0, i); | 269 | const R value = nnMatrixAt(label_matrix, 0, i); |
262 | if (value > max_value) { | 270 | if (value > max_value) { |
263 | max_value = value; | 271 | max_value = value; |
264 | pos_max = i; | 272 | pos_max = i; |
265 | } | 273 | } |
266 | } | 274 | } |
267 | assert(pos_max >= 0); | 275 | assert(pos_max >= 0); |
@@ -269,7 +277,8 @@ static int OneHotDecode(const nnMatrix* label_matrix) { | |||
269 | return pos_max; | 277 | return pos_max; |
270 | } | 278 | } |
271 | 279 | ||
272 | static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_set) { | 280 | static bool ReadLabels( |
281 | gzFile labels_file, int max_num_labels, ImageSet* image_set) { | ||
273 | assert(labels_file != Z_NULL); | 282 | assert(labels_file != Z_NULL); |
274 | assert(image_set != 0); | 283 | assert(image_set != 0); |
275 | 284 | ||
@@ -278,24 +287,28 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s | |||
278 | uint8_t* labels = 0; | 287 | uint8_t* labels = 0; |
279 | 288 | ||
280 | int32_t magic, total_labels; | 289 | int32_t magic, total_labels; |
281 | if ( (gzread(labels_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | 290 | if ((gzread(labels_file, (char*)&magic, sizeof(int32_t)) != |
282 | (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != sizeof(int32_t)) ) { | 291 | sizeof(int32_t)) || |
292 | (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != | ||
293 | sizeof(int32_t))) { | ||
283 | fprintf(stderr, "Failed to read header\n"); | 294 | fprintf(stderr, "Failed to read header\n"); |
284 | goto cleanup; | 295 | goto cleanup; |
285 | } | 296 | } |
286 | 297 | ||
287 | magic = ReverseEndian32(magic); | 298 | magic = ReverseEndian32(magic); |
288 | total_labels = ReverseEndian32(total_labels); | 299 | total_labels = ReverseEndian32(total_labels); |
289 | 300 | ||
290 | if (magic != LABEL_FILE_MAGIC) { | 301 | if (magic != LABEL_FILE_MAGIC) { |
291 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | 302 | fprintf( |
292 | magic, LABEL_FILE_MAGIC); | 303 | stderr, "Magic number mismatch. Got %x, expected: %x\n", magic, |
304 | LABEL_FILE_MAGIC); | ||
293 | goto cleanup; | 305 | goto cleanup; |
294 | } | 306 | } |
295 | 307 | ||
296 | printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels); | 308 | printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels); |
297 | 309 | ||
298 | total_labels = max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; | 310 | total_labels = |
311 | max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; | ||
299 | 312 | ||
300 | assert(image_set->count == total_labels); | 313 | assert(image_set->count == total_labels); |
301 | 314 | ||
@@ -308,7 +321,8 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s | |||
308 | goto cleanup; | 321 | goto cleanup; |
309 | } | 322 | } |
310 | 323 | ||
311 | if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != total_labels) { | 324 | if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != |
325 | total_labels) { | ||
312 | fprintf(stderr, "Failed to read labels\n"); | 326 | fprintf(stderr, "Failed to read labels\n"); |
313 | goto cleanup; | 327 | goto cleanup; |
314 | } | 328 | } |
@@ -335,17 +349,17 @@ int main(int argc, const char** argv) { | |||
335 | 349 | ||
336 | bool success = false; | 350 | bool success = false; |
337 | 351 | ||
338 | gzFile train_images_file = Z_NULL; | 352 | gzFile train_images_file = Z_NULL; |
339 | gzFile train_labels_file = Z_NULL; | 353 | gzFile train_labels_file = Z_NULL; |
340 | gzFile test_images_file = Z_NULL; | 354 | gzFile test_images_file = Z_NULL; |
341 | gzFile test_labels_file = Z_NULL; | 355 | gzFile test_labels_file = Z_NULL; |
342 | ImageSet train_set = { 0 }; | 356 | ImageSet train_set = {0}; |
343 | ImageSet test_set = { 0 }; | 357 | ImageSet test_set = {0}; |
344 | nnNeuralNetwork* net = 0; | 358 | nnNeuralNetwork* net = 0; |
345 | nnQueryObject* query = 0; | 359 | nnQueryObject* query = 0; |
346 | 360 | ||
347 | const char* mnist_files_dir = argv[1]; | 361 | const char* mnist_files_dir = argv[1]; |
348 | const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; | 362 | const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; |
349 | 363 | ||
350 | char train_labels_path[PATH_MAX]; | 364 | char train_labels_path[PATH_MAX]; |
351 | char train_images_path[PATH_MAX]; | 365 | char train_images_path[PATH_MAX]; |
@@ -353,12 +367,12 @@ int main(int argc, const char** argv) { | |||
353 | char test_images_path[PATH_MAX]; | 367 | char test_images_path[PATH_MAX]; |
354 | strlcpy(train_labels_path, mnist_files_dir, PATH_MAX); | 368 | strlcpy(train_labels_path, mnist_files_dir, PATH_MAX); |
355 | strlcpy(train_images_path, mnist_files_dir, PATH_MAX); | 369 | strlcpy(train_images_path, mnist_files_dir, PATH_MAX); |
356 | strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); | 370 | strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); |
357 | strlcpy(test_images_path, mnist_files_dir, PATH_MAX); | 371 | strlcpy(test_images_path, mnist_files_dir, PATH_MAX); |
358 | strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX); | 372 | strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX); |
359 | strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX); | 373 | strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX); |
360 | strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); | 374 | strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); |
361 | strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); | 375 | strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); |
362 | 376 | ||
363 | train_images_file = gzopen(train_images_path, "r"); | 377 | train_images_file = gzopen(train_images_path, "r"); |
364 | if (train_images_file == Z_NULL) { | 378 | if (train_images_file == Z_NULL) { |
@@ -406,11 +420,18 @@ int main(int argc, const char** argv) { | |||
406 | } | 420 | } |
407 | 421 | ||
408 | // Network definition. | 422 | // Network definition. |
409 | const int image_size_pixels = train_set.rows * train_set.cols; | 423 | const int image_size_pixels = train_set.rows * train_set.cols; |
410 | const int num_layers = 2; | 424 | const int num_layers = 4; |
411 | const int layer_sizes[3] = { image_size_pixels, 100, 10 }; | 425 | const int hidden_size = 100; |
412 | const nnActivation layer_activations[2] = { nnSigmoid, nnSigmoid }; | 426 | const nnLayer layers[4] = { |
413 | if (!(net = nnMakeNet(num_layers, layer_sizes, layer_activations))) { | 427 | {.type = nnLinear, |
428 | .linear = {.input_size = image_size_pixels, .output_size = hidden_size}}, | ||
429 | {.type = nnSigmoid}, | ||
430 | {.type = nnLinear, | ||
431 | .linear = {.input_size = hidden_size, .output_size = 10}}, | ||
432 | {.type = nnSigmoid} | ||
433 | }; | ||
434 | if (!(net = nnMakeNet(layers, num_layers, image_size_pixels))) { | ||
414 | fprintf(stderr, "Failed to create neural network\n"); | 435 | fprintf(stderr, "Failed to create neural network\n"); |
415 | goto cleanup; | 436 | goto cleanup; |
416 | } | 437 | } |
@@ -418,17 +439,17 @@ int main(int argc, const char** argv) { | |||
418 | // Train. | 439 | // Train. |
419 | printf("Training with up to %d images from the data set\n\n", max_num_images); | 440 | printf("Training with up to %d images from the data set\n\n", max_num_images); |
420 | const nnTrainingParams training_params = { | 441 | const nnTrainingParams training_params = { |
421 | .learning_rate = 0.1, | 442 | .learning_rate = 0.1, |
422 | .max_iterations = TRAIN_ITERATIONS, | 443 | .max_iterations = TRAIN_ITERATIONS, |
423 | .seed = 0, | 444 | .seed = 0, |
424 | .weight_init = nnWeightInitNormal, | 445 | .weight_init = nnWeightInitNormal, |
425 | .debug = true, | 446 | .debug = true, |
426 | }; | 447 | }; |
427 | nnTrain(net, &train_set.images, &train_set.labels, &training_params); | 448 | nnTrain(net, &train_set.images, &train_set.labels, &training_params); |
428 | 449 | ||
429 | // Test. | 450 | // Test. |
430 | int hits = 0; | 451 | int hits = 0; |
431 | query = nnMakeQueryObject(net, /*num_inputs=*/1); | 452 | query = nnMakeQueryObject(net, /*num_inputs=*/1); |
432 | for (int i = 0; i < test_set.count; ++i) { | 453 | for (int i = 0; i < test_set.count; ++i) { |
433 | const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1); | 454 | const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1); |
434 | const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1); | 455 | const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1); |
@@ -444,7 +465,7 @@ int main(int argc, const char** argv) { | |||
444 | } | 465 | } |
445 | const R hit_ratio = (R)hits / (R)test_set.count; | 466 | const R hit_ratio = (R)hits / (R)test_set.count; |
446 | printf("Test images: %d\n", test_set.count); | 467 | printf("Test images: %d\n", test_set.count); |
447 | printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio*100); | 468 | printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio * 100); |
448 | 469 | ||
449 | success = true; | 470 | success = true; |
450 | 471 | ||
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h index b7281bf..f80b985 100644 --- a/src/lib/include/neuralnet/matrix.h +++ b/src/lib/include/neuralnet/matrix.h | |||
@@ -17,6 +17,9 @@ nnMatrix nnMatrixMake(int rows, int cols); | |||
17 | /// Delete a matrix and free its internal memory. | 17 | /// Delete a matrix and free its internal memory. |
18 | void nnMatrixDel(nnMatrix*); | 18 | void nnMatrixDel(nnMatrix*); |
19 | 19 | ||
20 | /// Construct a matrix from an array of values. | ||
21 | nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]); | ||
22 | |||
20 | /// Move a matrix. | 23 | /// Move a matrix. |
21 | /// | 24 | /// |
22 | /// |in| is an empty matrix after the move. | 25 | /// |in| is an empty matrix after the move. |
diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h index 05c9406..f122c2a 100644 --- a/src/lib/include/neuralnet/neuralnet.h +++ b/src/lib/include/neuralnet/neuralnet.h | |||
@@ -1,32 +1,45 @@ | |||
1 | #pragma once | 1 | #pragma once |
2 | 2 | ||
3 | #include <neuralnet/matrix.h> | ||
3 | #include <neuralnet/types.h> | 4 | #include <neuralnet/types.h> |
4 | 5 | ||
5 | typedef struct nnMatrix nnMatrix; | ||
6 | |||
7 | typedef struct nnNeuralNetwork nnNeuralNetwork; | 6 | typedef struct nnNeuralNetwork nnNeuralNetwork; |
8 | typedef struct nnQueryObject nnQueryObject; | 7 | typedef struct nnQueryObject nnQueryObject; |
9 | 8 | ||
10 | /// Neuron activation. | 9 | /// Linear layer parameters. |
11 | typedef enum nnActivation { | 10 | /// |
12 | nnIdentity, | 11 | /// Either one of the following must be set: |
12 | /// a) Training: input and output sizes. | ||
13 | /// b) Inference: weights + biases. | ||
14 | typedef struct nnLinearParams { | ||
15 | int input_size; | ||
16 | int output_size; | ||
17 | nnMatrix weights; | ||
18 | nnMatrix biases; | ||
19 | } nnLinearParams; | ||
20 | |||
21 | /// Layer type. | ||
22 | typedef enum nnLayerType { | ||
23 | nnLinear, | ||
13 | nnSigmoid, | 24 | nnSigmoid, |
14 | nnRelu, | 25 | nnRelu, |
15 | } nnActivation; | 26 | } nnLayerType; |
27 | |||
28 | /// Neural network layer. | ||
29 | typedef struct nnLayer { | ||
30 | nnLayerType type; | ||
31 | union { | ||
32 | nnLinearParams linear; | ||
33 | }; | ||
34 | } nnLayer; | ||
16 | 35 | ||
17 | /// Create a network. | 36 | /// Create a network. |
18 | nnNeuralNetwork* nnMakeNet( | 37 | nnNeuralNetwork* nnMakeNet( |
19 | int num_layers, const int* layer_sizes, const nnActivation* activations); | 38 | const nnLayer* layers, int num_layers, int input_size); |
20 | 39 | ||
21 | /// Delete the network and free its internal memory. | 40 | /// Delete the network and free its internal memory. |
22 | void nnDeleteNet(nnNeuralNetwork**); | 41 | void nnDeleteNet(nnNeuralNetwork**); |
23 | 42 | ||
24 | /// Set the network's weights. | ||
25 | void nnSetWeights(nnNeuralNetwork*, const R* weights); | ||
26 | |||
27 | /// Set the network's biases. | ||
28 | void nnSetBiases(nnNeuralNetwork*, const R* biases); | ||
29 | |||
30 | /// Query the network. | 43 | /// Query the network. |
31 | /// | 44 | /// |
32 | /// |input| is a matrix of inputs, one row per input and as many columns as the | 45 | /// |input| is a matrix of inputs, one row per input and as many columns as the |
@@ -42,10 +55,10 @@ void nnQueryArray( | |||
42 | 55 | ||
43 | /// Create a query object. | 56 | /// Create a query object. |
44 | /// | 57 | /// |
45 | /// The query object holds all the internal memory required to query a network. | 58 | /// The query object holds all the internal memory required to query a network |
46 | /// Query objects allocate all memory up front so that network queries can run | 59 | /// with batches of the given size. Memory is allocated up front so that network |
47 | /// without additional memory allocation. | 60 | /// queries can run without additional memory allocation. |
48 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int num_inputs); | 61 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int batch_size); |
49 | 62 | ||
50 | /// Delete the query object and free its internal memory. | 63 | /// Delete the query object and free its internal memory. |
51 | void nnDeleteQueryObject(nnQueryObject**); | 64 | void nnDeleteQueryObject(nnQueryObject**); |
@@ -60,7 +73,7 @@ int nnNetInputSize(const nnNeuralNetwork*); | |||
60 | int nnNetOutputSize(const nnNeuralNetwork*); | 73 | int nnNetOutputSize(const nnNeuralNetwork*); |
61 | 74 | ||
62 | /// Return the layer's input size. | 75 | /// Return the layer's input size. |
63 | int nnLayerInputSize(const nnMatrix* weights); | 76 | int nnLayerInputSize(const nnNeuralNetwork*, int layer); |
64 | 77 | ||
65 | /// Return the layer's output size. | 78 | /// Return the layer's output size. |
66 | int nnLayerOutputSize(const nnMatrix* weights); | 79 | int nnLayerOutputSize(const nnNeuralNetwork*, int layer); |
diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h index b56a69e..4c8a9e4 100644 --- a/src/lib/src/activation.h +++ b/src/lib/src/activation.h | |||
@@ -9,8 +9,8 @@ static inline R sigmoid(R x) { return 1. / (1. + exp(-x)); } | |||
9 | static inline R relu(R x) { return fmax(0, x); } | 9 | static inline R relu(R x) { return fmax(0, x); } |
10 | 10 | ||
11 | #define NN_MAP_ARRAY(f, in, out, size) \ | 11 | #define NN_MAP_ARRAY(f, in, out, size) \ |
12 | for (int i = 0; i < size; ++i) { \ | 12 | for (int ii = 0; ii < size; ++ii) { \ |
13 | out[i] = f(in[i]); \ | 13 | out[ii] = f(in[ii]); \ |
14 | } | 14 | } |
15 | 15 | ||
16 | #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) | 16 | #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) |
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c index d98c8bb..d5c3fcc 100644 --- a/src/lib/src/matrix.c +++ b/src/lib/src/matrix.c | |||
@@ -26,6 +26,12 @@ void nnMatrixDel(nnMatrix* matrix) { | |||
26 | } | 26 | } |
27 | } | 27 | } |
28 | 28 | ||
29 | nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]) { | ||
30 | nnMatrix m = nnMatrixMake(rows, cols); | ||
31 | nnMatrixInit(&m, values); | ||
32 | return m; | ||
33 | } | ||
34 | |||
29 | void nnMatrixMove(nnMatrix* in, nnMatrix* out) { | 35 | void nnMatrixMove(nnMatrix* in, nnMatrix* out) { |
30 | assert(in); | 36 | assert(in); |
31 | assert(out); | 37 | assert(out); |
diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c index a5fc59b..4322b8c 100644 --- a/src/lib/src/neuralnet.c +++ b/src/lib/src/neuralnet.c | |||
@@ -7,11 +7,65 @@ | |||
7 | #include <assert.h> | 7 | #include <assert.h> |
8 | #include <stdlib.h> | 8 | #include <stdlib.h> |
9 | 9 | ||
10 | static void MakeLayerImpl( | ||
11 | int prev_layer_output_size, const nnLayer* layer, nnLayerImpl* impl) { | ||
12 | impl->type = layer->type; | ||
13 | |||
14 | switch (layer->type) { | ||
15 | case nnLinear: { | ||
16 | const nnLinearParams* params = &layer->linear; | ||
17 | nnLinearImpl* linear = &impl->linear; | ||
18 | |||
19 | if ((params->input_size > 0) && (params->output_size > 0)) { | ||
20 | const int rows = params->input_size; | ||
21 | const int cols = params->output_size; | ||
22 | linear->weights = nnMatrixMake(rows, cols); | ||
23 | linear->biases = nnMatrixMake(1, cols); | ||
24 | linear->owned = true; | ||
25 | } else { | ||
26 | linear->weights = params->weights; | ||
27 | linear->biases = params->biases; | ||
28 | linear->owned = false; | ||
29 | } | ||
30 | |||
31 | impl->input_size = linear->weights.rows; | ||
32 | impl->output_size = linear->weights.cols; | ||
33 | |||
34 | break; | ||
35 | } | ||
36 | |||
37 | // Activation layers. | ||
38 | case nnRelu: | ||
39 | case nnSigmoid: | ||
40 | impl->input_size = prev_layer_output_size; | ||
41 | impl->output_size = prev_layer_output_size; | ||
42 | break; | ||
43 | } | ||
44 | } | ||
45 | |||
46 | static void DeleteLayer(nnLayerImpl* layer) { | ||
47 | switch (layer->type) { | ||
48 | case nnLinear: { | ||
49 | nnLinearImpl* linear = &layer->linear; | ||
50 | if (linear->owned) { | ||
51 | nnMatrixDel(&linear->weights); | ||
52 | nnMatrixDel(&linear->biases); | ||
53 | } | ||
54 | break; | ||
55 | } | ||
56 | |||
57 | // No parameters for these layers. | ||
58 | case nnRelu: | ||
59 | case nnSigmoid: | ||
60 | break; | ||
61 | } | ||
62 | } | ||
63 | |||
10 | nnNeuralNetwork* nnMakeNet( | 64 | nnNeuralNetwork* nnMakeNet( |
11 | int num_layers, const int* layer_sizes, const nnActivation* activations) { | 65 | const nnLayer* layers, int num_layers, int input_size) { |
66 | assert(layers); | ||
12 | assert(num_layers > 0); | 67 | assert(num_layers > 0); |
13 | assert(layer_sizes); | 68 | assert(input_size > 0); |
14 | assert(activations); | ||
15 | 69 | ||
16 | nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork)); | 70 | nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork)); |
17 | if (net == 0) { | 71 | if (net == 0) { |
@@ -20,84 +74,38 @@ nnNeuralNetwork* nnMakeNet( | |||
20 | 74 | ||
21 | net->num_layers = num_layers; | 75 | net->num_layers = num_layers; |
22 | 76 | ||
23 | net->weights = calloc(num_layers, sizeof(nnMatrix)); | 77 | net->layers = calloc(num_layers, sizeof(nnLayerImpl)); |
24 | net->biases = calloc(num_layers, sizeof(nnMatrix)); | 78 | if (net->layers == 0) { |
25 | net->activations = calloc(num_layers, sizeof(nnActivation)); | ||
26 | if ((net->weights == 0) || (net->biases == 0) || (net->activations == 0)) { | ||
27 | nnDeleteNet(&net); | 79 | nnDeleteNet(&net); |
28 | return 0; | 80 | return 0; |
29 | } | 81 | } |
30 | 82 | ||
83 | int prev_layer_output_size = input_size; | ||
31 | for (int l = 0; l < num_layers; ++l) { | 84 | for (int l = 0; l < num_layers; ++l) { |
32 | // layer_sizes = { input layer size, first hidden layer size, ...} | 85 | MakeLayerImpl(prev_layer_output_size, &layers[l], &net->layers[l]); |
33 | const int layer_input_size = layer_sizes[l]; | 86 | prev_layer_output_size = net->layers[l].output_size; |
34 | const int layer_output_size = layer_sizes[l + 1]; | ||
35 | |||
36 | // We store the transpose of the weight matrix as written in textbooks. | ||
37 | // Our vectors are row vectors and the matrices row-major. | ||
38 | const int rows = layer_input_size; | ||
39 | const int cols = layer_output_size; | ||
40 | |||
41 | net->weights[l] = nnMatrixMake(rows, cols); | ||
42 | net->biases[l] = nnMatrixMake(1, cols); | ||
43 | net->activations[l] = activations[l]; | ||
44 | } | 87 | } |
45 | 88 | ||
46 | return net; | 89 | return net; |
47 | } | 90 | } |
48 | 91 | ||
49 | void nnDeleteNet(nnNeuralNetwork** net) { | 92 | void nnDeleteNet(nnNeuralNetwork** ppNet) { |
50 | if ((!net) || (!(*net))) { | 93 | if ((!ppNet) || (!(*ppNet))) { |
51 | return; | 94 | return; |
52 | } | 95 | } |
53 | if ((*net)->weights != 0) { | 96 | nnNeuralNetwork* net = *ppNet; |
54 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
55 | nnMatrixDel(&(*net)->weights[l]); | ||
56 | } | ||
57 | free((*net)->weights); | ||
58 | (*net)->weights = 0; | ||
59 | } | ||
60 | if ((*net)->biases != 0) { | ||
61 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
62 | nnMatrixDel(&(*net)->biases[l]); | ||
63 | } | ||
64 | free((*net)->biases); | ||
65 | (*net)->biases = 0; | ||
66 | } | ||
67 | if ((*net)->activations) { | ||
68 | free((*net)->activations); | ||
69 | (*net)->activations = 0; | ||
70 | } | ||
71 | free(*net); | ||
72 | *net = 0; | ||
73 | } | ||
74 | |||
75 | void nnSetWeights(nnNeuralNetwork* net, const R* weights) { | ||
76 | assert(net); | ||
77 | assert(weights); | ||
78 | 97 | ||
79 | for (int l = 0; l < net->num_layers; ++l) { | 98 | for (int l = 0; l < net->num_layers; ++l) { |
80 | nnMatrix* layer_weights = &net->weights[l]; | 99 | DeleteLayer(&net->layers[l]); |
81 | R* layer_values = layer_weights->values; | ||
82 | |||
83 | for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) { | ||
84 | *layer_values++ = *weights++; | ||
85 | } | ||
86 | } | 100 | } |
87 | } | ||
88 | |||
89 | void nnSetBiases(nnNeuralNetwork* net, const R* biases) { | ||
90 | assert(net); | ||
91 | assert(biases); | ||
92 | |||
93 | for (int l = 0; l < net->num_layers; ++l) { | ||
94 | nnMatrix* layer_biases = &net->biases[l]; | ||
95 | R* layer_values = layer_biases->values; | ||
96 | 101 | ||
97 | for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) { | 102 | if (net->layers) { |
98 | *layer_values++ = *biases++; | 103 | free(net->layers); |
99 | } | 104 | net->layers = 0; |
100 | } | 105 | } |
106 | |||
107 | free(net); | ||
108 | *ppNet = 0; | ||
101 | } | 109 | } |
102 | 110 | ||
103 | void nnQuery( | 111 | void nnQuery( |
@@ -114,35 +122,40 @@ void nnQuery( | |||
114 | nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1); | 122 | nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1); |
115 | 123 | ||
116 | for (int l = 0; l < net->num_layers; ++l) { | 124 | for (int l = 0; l < net->num_layers; ++l) { |
117 | const nnMatrix* layer_weights = &net->weights[l]; | ||
118 | const nnMatrix* layer_biases = &net->biases[l]; | ||
119 | // Y^T = (W*X)^T = X^T*W^T | ||
120 | // | ||
121 | // TODO: If we had a row-row matrix multiplication, we could compute: | ||
122 | // Y^T = W ** X^T | ||
123 | // The row-row multiplication could be more cache-friendly. We just need | ||
124 | // to store W as is, without transposing. | ||
125 | // We could also rewrite the original Mul function to go row x row, | ||
126 | // decomposing the multiplication. Preserving the original meaning of Mul | ||
127 | // makes everything clearer. | ||
128 | nnMatrix output_vector = | 125 | nnMatrix output_vector = |
129 | nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); | 126 | nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); |
130 | nnMatrixMul(&input_vector, layer_weights, &output_vector); | ||
131 | nnMatrixAddRow(&output_vector, layer_biases, &output_vector); | ||
132 | 127 | ||
133 | switch (net->activations[l]) { | 128 | switch (net->layers[l].type) { |
134 | case nnIdentity: | 129 | case nnLinear: { |
135 | break; // Nothing to do for the identity function. | 130 | const nnLinearImpl* linear = &net->layers[l].linear; |
136 | case nnSigmoid: | 131 | const nnMatrix* layer_weights = &linear->weights; |
137 | sigmoid_array( | 132 | const nnMatrix* layer_biases = &linear->biases; |
138 | output_vector.values, output_vector.values, output_vector.cols); | 133 | |
134 | // Y^T = (W*X)^T = X^T*W^T | ||
135 | // | ||
136 | // TODO: If we had a row-row matrix multiplication, we could compute: | ||
137 | // Y^T = W ** X^T | ||
138 | // | ||
139 | // The row-row multiplication could be more cache-friendly. We just need | ||
140 | // to store W as is, without transposing. | ||
141 | // | ||
142 | // We could also rewrite the original Mul function to go row x row, | ||
143 | // decomposing the multiplication. Preserving the original meaning of | ||
144 | // Mul makes everything clearer. | ||
145 | nnMatrixMul(&input_vector, layer_weights, &output_vector); | ||
146 | nnMatrixAddRow(&output_vector, layer_biases, &output_vector); | ||
139 | break; | 147 | break; |
148 | } | ||
140 | case nnRelu: | 149 | case nnRelu: |
150 | assert(input_vector.cols == output_vector.cols); | ||
141 | relu_array( | 151 | relu_array( |
142 | output_vector.values, output_vector.values, output_vector.cols); | 152 | input_vector.values, output_vector.values, output_vector.cols); |
153 | break; | ||
154 | case nnSigmoid: | ||
155 | assert(input_vector.cols == output_vector.cols); | ||
156 | sigmoid_array( | ||
157 | input_vector.values, output_vector.values, output_vector.cols); | ||
143 | break; | 158 | break; |
144 | default: | ||
145 | assert(0); | ||
146 | } | 159 | } |
147 | 160 | ||
148 | input_vector = output_vector; // Borrow. | 161 | input_vector = output_vector; // Borrow. |
@@ -159,15 +172,15 @@ void nnQueryArray( | |||
159 | assert(output); | 172 | assert(output); |
160 | assert(net->num_layers > 0); | 173 | assert(net->num_layers > 0); |
161 | 174 | ||
162 | nnMatrix input_vector = nnMatrixMake(net->weights[0].cols, 1); | 175 | nnMatrix input_vector = nnMatrixMake(1, nnNetInputSize(net)); |
163 | nnMatrixInit(&input_vector, input); | 176 | nnMatrixInit(&input_vector, input); |
164 | nnQuery(net, query, &input_vector); | 177 | nnQuery(net, query, &input_vector); |
165 | nnMatrixRowToArray(query->network_outputs, 0, output); | 178 | nnMatrixRowToArray(query->network_outputs, 0, output); |
166 | } | 179 | } |
167 | 180 | ||
168 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { | 181 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int batch_size) { |
169 | assert(net); | 182 | assert(net); |
170 | assert(num_inputs > 0); | 183 | assert(batch_size > 0); |
171 | assert(net->num_layers > 0); | 184 | assert(net->num_layers > 0); |
172 | 185 | ||
173 | nnQueryObject* query = calloc(1, sizeof(nnQueryObject)); | 186 | nnQueryObject* query = calloc(1, sizeof(nnQueryObject)); |
@@ -183,11 +196,12 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { | |||
183 | free(query); | 196 | free(query); |
184 | return 0; | 197 | return 0; |
185 | } | 198 | } |
199 | |||
186 | for (int l = 0; l < net->num_layers; ++l) { | 200 | for (int l = 0; l < net->num_layers; ++l) { |
187 | const nnMatrix* layer_weights = &net->weights[l]; | 201 | const int layer_output_size = nnLayerOutputSize(net, l); |
188 | const int layer_output_size = nnLayerOutputSize(layer_weights); | 202 | query->layer_outputs[l] = nnMatrixMake(batch_size, layer_output_size); |
189 | query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size); | ||
190 | } | 203 | } |
204 | |||
191 | query->network_outputs = &query->layer_outputs[net->num_layers - 1]; | 205 | query->network_outputs = &query->layer_outputs[net->num_layers - 1]; |
192 | 206 | ||
193 | return query; | 207 | return query; |
@@ -213,23 +227,19 @@ const nnMatrix* nnNetOutputs(const nnQueryObject* query) { | |||
213 | } | 227 | } |
214 | 228 | ||
215 | int nnNetInputSize(const nnNeuralNetwork* net) { | 229 | int nnNetInputSize(const nnNeuralNetwork* net) { |
216 | assert(net); | 230 | return nnLayerInputSize(net, 0); |
217 | assert(net->num_layers > 0); | ||
218 | return net->weights[0].rows; | ||
219 | } | 231 | } |
220 | 232 | ||
221 | int nnNetOutputSize(const nnNeuralNetwork* net) { | 233 | int nnNetOutputSize(const nnNeuralNetwork* net) { |
222 | assert(net); | 234 | return nnLayerOutputSize(net, net->num_layers - 1); |
223 | assert(net->num_layers > 0); | ||
224 | return net->weights[net->num_layers - 1].cols; | ||
225 | } | 235 | } |
226 | 236 | ||
227 | int nnLayerInputSize(const nnMatrix* weights) { | 237 | int nnLayerInputSize(const nnNeuralNetwork* net, int layer) { |
228 | assert(weights); | 238 | assert(net); |
229 | return weights->rows; | 239 | return net->layers[layer].input_size; |
230 | } | 240 | } |
231 | 241 | ||
232 | int nnLayerOutputSize(const nnMatrix* weights) { | 242 | int nnLayerOutputSize(const nnNeuralNetwork* net, int layer) { |
233 | assert(weights); | 243 | assert(net); |
234 | return weights->cols; | 244 | return net->layers[layer].output_size; |
235 | } | 245 | } |
diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h index f5a9c63..935c5ea 100644 --- a/src/lib/src/neuralnet_impl.h +++ b/src/lib/src/neuralnet_impl.h | |||
@@ -2,22 +2,29 @@ | |||
2 | 2 | ||
3 | #include <neuralnet/matrix.h> | 3 | #include <neuralnet/matrix.h> |
4 | 4 | ||
5 | #include <stdbool.h> | ||
6 | |||
7 | /// Linear layer parameters. | ||
8 | typedef struct nnLinearImpl { | ||
9 | nnMatrix weights; | ||
10 | nnMatrix biases; | ||
11 | bool owned; /// Whether the library owns the weights and biases. | ||
12 | } nnLinearImpl; | ||
13 | |||
14 | /// Neural network layer. | ||
15 | typedef struct nnLayerImpl { | ||
16 | nnLayerType type; | ||
17 | int input_size; | ||
18 | int output_size; | ||
19 | union { | ||
20 | nnLinearImpl linear; | ||
21 | }; | ||
22 | } nnLayerImpl; | ||
23 | |||
5 | /// Neural network object. | 24 | /// Neural network object. |
6 | /// | ||
7 | /// We store the transposes of the weight matrices so that we can do forward | ||
8 | /// passes with a minimal amount of work. That is, if in paper we write: | ||
9 | /// | ||
10 | /// [w11 w21] | ||
11 | /// [w12 w22] | ||
12 | /// | ||
13 | /// then the weight matrix in memory is stored as the following array: | ||
14 | /// | ||
15 | /// w11 w12 w21 w22 | ||
16 | typedef struct nnNeuralNetwork { | 25 | typedef struct nnNeuralNetwork { |
17 | int num_layers; // Number of non-input layers (hidden + output). | 26 | int num_layers; // Number of non-input layers (hidden + output). |
18 | nnMatrix* weights; // One matrix per non-input layer. | 27 | nnLayerImpl* layers; // One per non-input layer. |
19 | nnMatrix* biases; // One vector per non-input layer. | ||
20 | nnActivation* activations; // One per non-input layer. | ||
21 | } nnNeuralNetwork; | 28 | } nnNeuralNetwork; |
22 | 29 | ||
23 | /// A query object that holds all the memory necessary to query a network. | 30 | /// A query object that holds all the memory necessary to query a network. |
diff --git a/src/lib/src/train.c b/src/lib/src/train.c index dc93f0f..98f58ad 100644 --- a/src/lib/src/train.c +++ b/src/lib/src/train.c | |||
@@ -38,7 +38,7 @@ typedef struct nnSigmoidGradientElements { | |||
38 | /// each layer. A data type is defined for these because we allocate all the | 38 | /// each layer. A data type is defined for these because we allocate all the |
39 | /// required memory up front before entering the training loop. | 39 | /// required memory up front before entering the training loop. |
40 | typedef struct nnGradientElements { | 40 | typedef struct nnGradientElements { |
41 | nnActivation type; | 41 | nnLayerType type; |
42 | // Gradient vector, same size as the layer. | 42 | // Gradient vector, same size as the layer. |
43 | // This will contain the gradient expression except for the output value of | 43 | // This will contain the gradient expression except for the output value of |
44 | // the previous layer. | 44 | // the previous layer. |
@@ -57,10 +57,27 @@ void nnInitNet( | |||
57 | mt19937_64_init(&rng, seed); | 57 | mt19937_64_init(&rng, seed); |
58 | 58 | ||
59 | for (int l = 0; l < net->num_layers; ++l) { | 59 | for (int l = 0; l < net->num_layers; ++l) { |
60 | nnMatrix* weights = &net->weights[l]; | 60 | // Get the layer's weights and biases, if any. |
61 | nnMatrix* biases = &net->biases[l]; | 61 | nnMatrix* weights = 0; |
62 | nnMatrix* biases = 0; | ||
63 | switch (net->layers[l].type) { | ||
64 | case nnLinear: { | ||
65 | nnLinearImpl* linear = &net->layers[l].linear; | ||
66 | |||
67 | weights = &linear->weights; | ||
68 | biases = &linear->biases; | ||
69 | break; | ||
70 | } | ||
71 | // Activations. | ||
72 | case nnRelu: | ||
73 | case nnSigmoid: | ||
74 | break; | ||
75 | } | ||
76 | if (!weights || !biases) { | ||
77 | continue; | ||
78 | } | ||
62 | 79 | ||
63 | const R layer_size = (R)nnLayerInputSize(weights); | 80 | const R layer_size = (R)nnLayerInputSize(net, l); |
64 | const R scale = 1. / layer_size; | 81 | const R scale = 1. / layer_size; |
65 | const R stdev = 1. / sqrt((R)layer_size); | 82 | const R stdev = 1. / sqrt((R)layer_size); |
66 | const R sigma = stdev * stdev; | 83 | const R sigma = stdev * stdev; |
@@ -128,9 +145,6 @@ void nnTrain( | |||
128 | // with one sample at a time. | 145 | // with one sample at a time. |
129 | nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); | 146 | nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); |
130 | 147 | ||
131 | // Allocate the weight transpose matrices up front for backpropagation. | ||
132 | // nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix)); | ||
133 | |||
134 | // Allocate the weight delta matrices. | 148 | // Allocate the weight delta matrices. |
135 | nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); | 149 | nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); |
136 | 150 | ||
@@ -144,30 +158,24 @@ void nnTrain( | |||
144 | nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); | 158 | nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); |
145 | 159 | ||
146 | assert(errors != 0); | 160 | assert(errors != 0); |
147 | // assert(weights_T != 0); | ||
148 | assert(weight_deltas != 0); | 161 | assert(weight_deltas != 0); |
149 | assert(gradient_elems); | 162 | assert(gradient_elems); |
150 | assert(outputs_T); | 163 | assert(outputs_T); |
151 | 164 | ||
152 | for (int l = 0; l < net->num_layers; ++l) { | 165 | for (int l = 0; l < net->num_layers; ++l) { |
153 | const nnMatrix* layer_weights = &net->weights[l]; | 166 | const int layer_input_size = nnLayerInputSize(net, l); |
154 | const int layer_output_size = net->weights[l].cols; | 167 | const int layer_output_size = nnLayerOutputSize(net, l); |
155 | const nnActivation activation = net->activations[l]; | 168 | const nnLayerImpl* layer = &net->layers[l]; |
156 | |||
157 | errors[l] = nnMatrixMake(1, layer_weights->cols); | ||
158 | |||
159 | // weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows); | ||
160 | // nnMatrixTranspose(layer_weights, &weights_T[l]); | ||
161 | |||
162 | weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols); | ||
163 | 169 | ||
164 | outputs_T[l] = nnMatrixMake(layer_output_size, 1); | 170 | errors[l] = nnMatrixMake(1, layer_output_size); |
171 | weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size); | ||
172 | outputs_T[l] = nnMatrixMake(layer_output_size, 1); | ||
165 | 173 | ||
166 | // Allocate the gradient elements and vectors for weight delta calculation. | 174 | // Allocate the gradient elements and vectors for weight delta calculation. |
167 | nnGradientElements* elems = &gradient_elems[l]; | 175 | nnGradientElements* elems = &gradient_elems[l]; |
168 | elems->type = activation; | 176 | elems->type = layer->type; |
169 | switch (activation) { | 177 | switch (layer->type) { |
170 | case nnIdentity: | 178 | case nnLinear: |
171 | break; // Gradient vector will be borrowed, no need to allocate. | 179 | break; // Gradient vector will be borrowed, no need to allocate. |
172 | 180 | ||
173 | case nnSigmoid: | 181 | case nnSigmoid: |
@@ -208,6 +216,7 @@ void nnTrain( | |||
208 | 216 | ||
209 | // For now, we train with one sample at a time. | 217 | // For now, we train with one sample at a time. |
210 | for (int sample = 0; sample < inputs->rows; ++sample) { | 218 | for (int sample = 0; sample < inputs->rows; ++sample) { |
219 | // TODO: Introduce a BorrowMut. | ||
211 | // Slice the input and target matrices with the batch size. | 220 | // Slice the input and target matrices with the batch size. |
212 | // We are not mutating the inputs, but we need the cast to borrow. | 221 | // We are not mutating the inputs, but we need the cast to borrow. |
213 | nnMatrix training_inputs = | 222 | nnMatrix training_inputs = |
@@ -219,15 +228,16 @@ void nnTrain( | |||
219 | // Assuming one training input per iteration for now. | 228 | // Assuming one training input per iteration for now. |
220 | nnMatrixTranspose(&training_inputs, &training_inputs_T); | 229 | nnMatrixTranspose(&training_inputs, &training_inputs_T); |
221 | 230 | ||
222 | // Run a forward pass and compute the output layer error relevant to the | 231 | // Forward pass. |
223 | // derivative: o-t. | 232 | nnQuery(net, query, &training_inputs); |
224 | // Error: (t-o)^2 | 233 | |
225 | // dE/do = -2(t-o) | 234 | // Compute the error derivative: o-t. |
226 | // = +2(o-t) | 235 | // Error: 1/2 (t-o)^2 |
236 | // dE/do = -(t-o) | ||
237 | // = +(o-t) | ||
227 | // Note that we compute o-t instead to remove that outer negative sign. | 238 | // Note that we compute o-t instead to remove that outer negative sign. |
228 | // The 2 is dropped because we are only interested in the direction of the | 239 | // The 2 is dropped because we are only interested in the direction of the |
229 | // gradient. The learning rate controls the magnitude. | 240 | // gradient. The learning rate controls the magnitude. |
230 | nnQuery(net, query, &training_inputs); | ||
231 | nnMatrixSub( | 241 | nnMatrixSub( |
232 | training_outputs, &training_targets, &errors[net->num_layers - 1]); | 242 | training_outputs, &training_targets, &errors[net->num_layers - 1]); |
233 | 243 | ||
@@ -236,68 +246,86 @@ void nnTrain( | |||
236 | nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); | 246 | nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); |
237 | } | 247 | } |
238 | 248 | ||
239 | // Update weights and biases for each internal layer, backpropagating | 249 | // Update weights and biases for each internal layer, back-propagating |
240 | // errors along the way. | 250 | // errors along the way. |
241 | for (int l = net->num_layers - 1; l >= 0; --l) { | 251 | for (int l = net->num_layers - 1; l >= 0; --l) { |
242 | const nnMatrix* layer_output = &query->layer_outputs[l]; | 252 | const nnMatrix* layer_output = &query->layer_outputs[l]; |
243 | nnMatrix* layer_weights = &net->weights[l]; | 253 | nnGradientElements* elems = &gradient_elems[l]; |
244 | nnMatrix* layer_biases = &net->biases[l]; | 254 | nnMatrix* gradient = &elems->gradient; |
245 | nnGradientElements* elems = &gradient_elems[l]; | 255 | nnLayerImpl* layer = &net->layers[l]; |
246 | nnMatrix* gradient = &elems->gradient; | 256 | |
247 | const nnActivation activation = net->activations[l]; | 257 | // Compute this layer's gradient. |
248 | 258 | // | |
249 | // Compute the gradient (the part of the expression that does not | 259 | // By "gradient" we mean the expression common to the weights and bias |
250 | // contain the output of the previous layer). | 260 | // gradients. This is the part of the expression that does not contain |
261 | // this layer's input. | ||
251 | // | 262 | // |
252 | // Identity: G = error_k | 263 | // Linear: G = id |
253 | // Sigmoid: G = error_k * output_k * (1 - output_k). | 264 | // Relu: G = (output_k > 0 ? 1 : 0) |
254 | // Relu: G = error_k * (output_k > 0 ? 1 : 0) | 265 | // Sigmoid: G = output_k * (1 - output_k) |
255 | switch (activation) { | 266 | switch (layer->type) { |
256 | case nnIdentity: | 267 | case nnLinear: { |
257 | // TODO: Just copy the pointer? | 268 | // TODO: Just copy the pointer? |
258 | *gradient = nnMatrixBorrow(&errors[l]); | 269 | *gradient = nnMatrixBorrow(&errors[l]); |
259 | break; | 270 | break; |
271 | } | ||
272 | case nnRelu: | ||
273 | nnMatrixGt(layer_output, 0, gradient); | ||
274 | break; | ||
260 | case nnSigmoid: | 275 | case nnSigmoid: |
261 | nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); | 276 | nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); |
262 | nnMatrixMulPairs(layer_output, gradient, gradient); | 277 | nnMatrixMulPairs(layer_output, gradient, gradient); |
263 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
264 | break; | ||
265 | case nnRelu: | ||
266 | nnMatrixGt(layer_output, 0, gradient); | ||
267 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
268 | break; | 278 | break; |
269 | } | 279 | } |
270 | 280 | ||
271 | // Outer product to compute the weight deltas. | 281 | // Back-propagate the error. |
272 | const nnMatrix* output_T = | 282 | // |
273 | (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; | 283 | // This combines this layer's gradient with the back-propagated error, |
274 | nnMatrixMul(output_T, gradient, &weight_deltas[l]); | 284 | // which is the combination of the gradients of subsequent layers down |
275 | 285 | // to the output layer error. | |
276 | // Backpropagate the error before updating weights. | 286 | // |
287 | // Note that this step uses the layer's original weights. | ||
277 | if (l > 0) { | 288 | if (l > 0) { |
278 | // G * W^T == G *^T W. | 289 | switch (layer->type) { |
279 | // nnMatrixMul(gradient, &weights_T[l], &errors[l-1]); | 290 | case nnLinear: { |
280 | nnMatrixMulRows(gradient, layer_weights, &errors[l - 1]); | 291 | const nnMatrix* layer_weights = &layer->linear.weights; |
292 | // E * W^T == E *^T W. | ||
293 | // Using nnMatrixMulRows, we avoid having to transpose the weight | ||
294 | // matrix. | ||
295 | nnMatrixMulRows(&errors[l], layer_weights, &errors[l - 1]); | ||
296 | break; | ||
297 | } | ||
298 | // For activations, the error back-propagates as is but multiplied by | ||
299 | // the layer's gradient. | ||
300 | case nnRelu: | ||
301 | case nnSigmoid: | ||
302 | nnMatrixMulPairs(&errors[l], gradient, &errors[l - 1]); | ||
303 | break; | ||
304 | } | ||
281 | } | 305 | } |
282 | 306 | ||
283 | // Update weights. | 307 | // Update layer weights. |
284 | nnMatrixScale(&weight_deltas[l], params->learning_rate); | 308 | if (layer->type == nnLinear) { |
285 | // The gradient has a negative sign from -(t - o), but we have computed | 309 | nnLinearImpl* linear = &layer->linear; |
286 | // e = o - t instead, so we can subtract directly. | 310 | nnMatrix* layer_weights = &linear->weights; |
287 | // nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights); | 311 | nnMatrix* layer_biases = &linear->biases; |
288 | nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); | 312 | |
289 | 313 | // Outer product to compute the weight deltas. | |
290 | // Update weight transpose matrix for the next training iteration. | 314 | // This layer's input is the previous layer's output. |
291 | // nnMatrixTranspose(layer_weights, &weights_T[l]); | 315 | const nnMatrix* input_T = |
292 | 316 | (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; | |
293 | // Update biases. | 317 | nnMatrixMul(input_T, gradient, &weight_deltas[l]); |
294 | // This is the same formula as for weights, except that the o_j term is | 318 | |
295 | // just 1. We can simply re-use the gradient that we have already | 319 | // Update weights. |
296 | // computed for the weight update. | 320 | nnMatrixScale(&weight_deltas[l], params->learning_rate); |
297 | // nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, | 321 | nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); |
298 | // layer_biases); | 322 | |
299 | nnMatrixMulSub( | 323 | // Update biases. |
300 | layer_biases, gradient, params->learning_rate, layer_biases); | 324 | // This is the same formula as for weights, except that the o_j term |
325 | // is just 1. | ||
326 | nnMatrixMulSub( | ||
327 | layer_biases, gradient, params->learning_rate, layer_biases); | ||
328 | } | ||
301 | } | 329 | } |
302 | 330 | ||
303 | // TODO: Add this under a verbose debugging mode. | 331 | // TODO: Add this under a verbose debugging mode. |
@@ -334,12 +362,11 @@ void nnTrain( | |||
334 | for (int l = 0; l < net->num_layers; ++l) { | 362 | for (int l = 0; l < net->num_layers; ++l) { |
335 | nnMatrixDel(&errors[l]); | 363 | nnMatrixDel(&errors[l]); |
336 | nnMatrixDel(&outputs_T[l]); | 364 | nnMatrixDel(&outputs_T[l]); |
337 | // nnMatrixDel(&weights_T[l]); | ||
338 | nnMatrixDel(&weight_deltas[l]); | 365 | nnMatrixDel(&weight_deltas[l]); |
339 | 366 | ||
340 | nnGradientElements* elems = &gradient_elems[l]; | 367 | nnGradientElements* elems = &gradient_elems[l]; |
341 | switch (elems->type) { | 368 | switch (elems->type) { |
342 | case nnIdentity: | 369 | case nnLinear: |
343 | break; // Gradient vector is borrowed, no need to deallocate. | 370 | break; // Gradient vector is borrowed, no need to deallocate. |
344 | 371 | ||
345 | case nnSigmoid: | 372 | case nnSigmoid: |
@@ -355,7 +382,6 @@ void nnTrain( | |||
355 | nnMatrixDel(&training_inputs_T); | 382 | nnMatrixDel(&training_inputs_T); |
356 | free(errors); | 383 | free(errors); |
357 | free(outputs_T); | 384 | free(outputs_T); |
358 | // free(weights_T); | ||
359 | free(weight_deltas); | 385 | free(weight_deltas); |
360 | free(gradient_elems); | 386 | free(gradient_elems); |
361 | } | 387 | } |
diff --git a/src/lib/test/neuralnet_test.c b/src/lib/test/neuralnet_test.c index 14d9438..0f8d7b8 100644 --- a/src/lib/test/neuralnet_test.c +++ b/src/lib/test/neuralnet_test.c | |||
@@ -1,8 +1,8 @@ | |||
1 | #include <neuralnet/neuralnet.h> | 1 | #include <neuralnet/neuralnet.h> |
2 | 2 | ||
3 | #include <neuralnet/matrix.h> | ||
4 | #include "activation.h" | 3 | #include "activation.h" |
5 | #include "neuralnet_impl.h" | 4 | #include "neuralnet_impl.h" |
5 | #include <neuralnet/matrix.h> | ||
6 | 6 | ||
7 | #include "test.h" | 7 | #include "test.h" |
8 | #include "test_util.h" | 8 | #include "test_util.h" |
@@ -10,23 +10,31 @@ | |||
10 | #include <assert.h> | 10 | #include <assert.h> |
11 | 11 | ||
12 | TEST_CASE(neuralnet_perceptron_test) { | 12 | TEST_CASE(neuralnet_perceptron_test) { |
13 | const int num_layers = 1; | 13 | const int num_layers = 2; |
14 | const int layer_sizes[] = { 1, 1 }; | 14 | const int input_size = 1; |
15 | const nnActivation layer_activations[] = { nnSigmoid }; | 15 | const R weights[] = {0.3}; |
16 | const R weights[] = { 0.3 }; | 16 | const R biases[] = {0.0}; |
17 | const nnLayer layers[] = { | ||
18 | {.type = nnLinear, | ||
19 | .linear = | ||
20 | {.weights = nnMatrixFromArray(1, 1, weights), | ||
21 | .biases = nnMatrixFromArray(1, 1, biases)}}, | ||
22 | {.type = nnSigmoid}, | ||
23 | }; | ||
17 | 24 | ||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 25 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
19 | assert(net); | 26 | assert(net); |
20 | nnSetWeights(net, weights); | ||
21 | 27 | ||
22 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 28 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
23 | 29 | ||
24 | const R input[] = { 0.9 }; | 30 | const R input[] = {0.9}; |
25 | R output[1]; | 31 | R output[1]; |
26 | nnQueryArray(net, query, input, output); | 32 | nnQueryArray(net, query, input, output); |
27 | 33 | ||
28 | const R expected_output = sigmoid(input[0] * weights[0]); | 34 | const R expected_output = sigmoid(input[0] * weights[0]); |
29 | printf("\nOutput: %f, Expected: %f\n", output[0], expected_output); | 35 | printf( |
36 | "\n[neuralnet_perceptron_test] Output: %f, Expected: %f\n", output[0], | ||
37 | expected_output); | ||
30 | TEST_TRUE(double_eq(output[0], expected_output, EPS)); | 38 | TEST_TRUE(double_eq(output[0], expected_output, EPS)); |
31 | 39 | ||
32 | nnDeleteQueryObject(&query); | 40 | nnDeleteQueryObject(&query); |
@@ -34,53 +42,66 @@ TEST_CASE(neuralnet_perceptron_test) { | |||
34 | } | 42 | } |
35 | 43 | ||
36 | TEST_CASE(neuralnet_xor_test) { | 44 | TEST_CASE(neuralnet_xor_test) { |
37 | const int num_layers = 2; | 45 | // First (hidden) layer. |
38 | const int layer_sizes[] = { 2, 2, 1 }; | 46 | const R weights0[] = {1, 1, 1, 1}; |
39 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | 47 | const R biases0[] = {0, -1}; |
40 | const R weights[] = { | 48 | // Second (output) layer. |
41 | 1, 1, 1, 1, // First (hidden) layer. | 49 | const R weights1[] = {1, -2}; |
42 | 1, -2 // Second (output) layer. | 50 | const R biases1[] = {0}; |
43 | }; | 51 | // Network. |
44 | const R biases[] = { | 52 | const int num_layers = 3; |
45 | 0, -1, // First (hidden) layer. | 53 | const int input_size = 2; |
46 | 0 // Second (output) layer. | 54 | const nnLayer layers[] = { |
55 | {.type = nnLinear, | ||
56 | .linear = | ||
57 | {.weights = nnMatrixFromArray(2, 2, weights0), | ||
58 | .biases = nnMatrixFromArray(1, 2, biases0)}}, | ||
59 | {.type = nnRelu}, | ||
60 | {.type = nnLinear, | ||
61 | .linear = | ||
62 | {.weights = nnMatrixFromArray(2, 1, weights1), | ||
63 | .biases = nnMatrixFromArray(1, 1, biases1)}}, | ||
47 | }; | 64 | }; |
48 | 65 | ||
49 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 66 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
50 | assert(net); | 67 | assert(net); |
51 | nnSetWeights(net, weights); | ||
52 | nnSetBiases(net, biases); | ||
53 | 68 | ||
54 | // First layer weights. | 69 | // First layer weights. |
55 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 0), 1); | 70 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 0), 1); |
56 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 1), 1); | 71 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 1), 1); |
57 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 2), 1); | 72 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 2), 1); |
58 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 3), 1); | 73 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 3), 1); |
59 | // Second layer weights. | 74 | // Second linear layer (third layer) weights. |
60 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 0), 1); | 75 | TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 0), 1); |
61 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 1), -2); | 76 | TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 1), -2); |
62 | // First layer biases. | 77 | // First layer biases. |
63 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 0), 0); | 78 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 0), 0); |
64 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 1), -1); | 79 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 1), -1); |
65 | // Second layer biases. | 80 | // Second linear layer (third layer) biases. |
66 | TEST_EQUAL(nnMatrixAt(&net->biases[1], 0, 0), 0); | 81 | TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.biases, 0, 0), 0); |
67 | 82 | ||
68 | // Test. | 83 | // Test. |
69 | 84 | ||
70 | #define M 4 | 85 | #define M 4 |
71 | 86 | ||
72 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | 87 | nnQueryObject* query = nnMakeQueryObject(net, M); |
73 | 88 | ||
74 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | 89 | const R test_inputs[M][2] = { |
90 | {0., 0.}, | ||
91 | {1., 0.}, | ||
92 | {0., 1.}, | ||
93 | {1., 1.} | ||
94 | }; | ||
75 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | 95 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); |
76 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | 96 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); |
77 | nnQuery(net, query, &test_inputs_matrix); | 97 | nnQuery(net, query, &test_inputs_matrix); |
78 | 98 | ||
79 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | 99 | const R expected_outputs[M] = {0., 1., 1., 0.}; |
80 | for (int i = 0; i < M; ++i) { | 100 | for (int i = 0; i < M; ++i) { |
81 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 101 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |
82 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | 102 | printf( |
83 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | 103 | "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0], |
104 | test_inputs[i][1], test_output, expected_outputs[i]); | ||
84 | } | 105 | } |
85 | for (int i = 0; i < M; ++i) { | 106 | for (int i = 0; i < M; ++i) { |
86 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 107 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |
diff --git a/src/lib/test/train_linear_perceptron_non_origin_test.c b/src/lib/test/train_linear_perceptron_non_origin_test.c index 5a320ac..40a42e0 100644 --- a/src/lib/test/train_linear_perceptron_non_origin_test.c +++ b/src/lib/test/train_linear_perceptron_non_origin_test.c | |||
@@ -1,9 +1,8 @@ | |||
1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
2 | 2 | ||
3 | #include "neuralnet_impl.h" | ||
3 | #include <neuralnet/matrix.h> | 4 | #include <neuralnet/matrix.h> |
4 | #include <neuralnet/neuralnet.h> | 5 | #include <neuralnet/neuralnet.h> |
5 | #include "activation.h" | ||
6 | #include "neuralnet_impl.h" | ||
7 | 6 | ||
8 | #include "test.h" | 7 | #include "test.h" |
9 | #include "test_util.h" | 8 | #include "test_util.h" |
@@ -11,19 +10,21 @@ | |||
11 | #include <assert.h> | 10 | #include <assert.h> |
12 | 11 | ||
13 | TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { | 12 | TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { |
14 | const int num_layers = 1; | 13 | const int num_layers = 1; |
15 | const int layer_sizes[] = { 1, 1 }; | 14 | const int input_size = 1; |
16 | const nnActivation layer_activations[] = { nnIdentity }; | 15 | const nnLayer layers[] = { |
16 | {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}} | ||
17 | }; | ||
17 | 18 | ||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 19 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
19 | assert(net); | 20 | assert(net); |
20 | 21 | ||
21 | // Train. | 22 | // Train. |
22 | 23 | ||
23 | // Try to learn the Y = 2X + 1 line. | 24 | // Try to learn the Y = 2X + 1 line. |
24 | #define N 2 | 25 | #define N 2 |
25 | const R inputs[N] = { 0., 1. }; | 26 | const R inputs[N] = {0., 1.}; |
26 | const R targets[N] = { 1., 3. }; | 27 | const R targets[N] = {1., 3.}; |
27 | 28 | ||
28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | 29 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); |
29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | 30 | nnMatrix targets_matrix = nnMatrixMake(N, 1); |
@@ -31,31 +32,32 @@ TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { | |||
31 | nnMatrixInit(&targets_matrix, targets); | 32 | nnMatrixInit(&targets_matrix, targets); |
32 | 33 | ||
33 | nnTrainingParams params = { | 34 | nnTrainingParams params = { |
34 | .learning_rate = 0.7, | 35 | .learning_rate = 0.7, |
35 | .max_iterations = 20, | 36 | .max_iterations = 20, |
36 | .seed = 0, | 37 | .seed = 0, |
37 | .weight_init = nnWeightInit01, | 38 | .weight_init = nnWeightInit01, |
38 | .debug = false, | 39 | .debug = false, |
39 | }; | 40 | }; |
40 | 41 | ||
41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 42 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
42 | 43 | ||
43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | 44 | const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); |
44 | const R expected_weight = 2.0; | 45 | const R expected_weight = 2.0; |
45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | 46 | printf( |
47 | "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | 48 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); |
47 | 49 | ||
48 | const R bias = nnMatrixAt(&net->biases[0], 0, 0); | 50 | const R bias = nnMatrixAt(&net->layers[0].linear.biases, 0, 0); |
49 | const R expected_bias = 1.0; | 51 | const R expected_bias = 1.0; |
50 | printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias); | 52 | printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias); |
51 | TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS)); | 53 | TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS)); |
52 | 54 | ||
53 | // Test. | 55 | // Test. |
54 | 56 | ||
55 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 57 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
56 | 58 | ||
57 | const R test_input[] = { 2.3 }; | 59 | const R test_input[] = {2.3}; |
58 | R test_output[1]; | 60 | R test_output[1]; |
59 | nnQueryArray(net, query, test_input, test_output); | 61 | nnQueryArray(net, query, test_input, test_output); |
60 | 62 | ||
61 | const R expected_output = test_input[0] * expected_weight + expected_bias; | 63 | const R expected_output = test_input[0] * expected_weight + expected_bias; |
diff --git a/src/lib/test/train_linear_perceptron_test.c b/src/lib/test/train_linear_perceptron_test.c index 2b1336d..667643b 100644 --- a/src/lib/test/train_linear_perceptron_test.c +++ b/src/lib/test/train_linear_perceptron_test.c | |||
@@ -1,9 +1,8 @@ | |||
1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
2 | 2 | ||
3 | #include "neuralnet_impl.h" | ||
3 | #include <neuralnet/matrix.h> | 4 | #include <neuralnet/matrix.h> |
4 | #include <neuralnet/neuralnet.h> | 5 | #include <neuralnet/neuralnet.h> |
5 | #include "activation.h" | ||
6 | #include "neuralnet_impl.h" | ||
7 | 6 | ||
8 | #include "test.h" | 7 | #include "test.h" |
9 | #include "test_util.h" | 8 | #include "test_util.h" |
@@ -11,19 +10,21 @@ | |||
11 | #include <assert.h> | 10 | #include <assert.h> |
12 | 11 | ||
13 | TEST_CASE(neuralnet_train_linear_perceptron_test) { | 12 | TEST_CASE(neuralnet_train_linear_perceptron_test) { |
14 | const int num_layers = 1; | 13 | const int num_layers = 1; |
15 | const int layer_sizes[] = { 1, 1 }; | 14 | const int input_size = 1; |
16 | const nnActivation layer_activations[] = { nnIdentity }; | 15 | const nnLayer layers[] = { |
16 | {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}} | ||
17 | }; | ||
17 | 18 | ||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 19 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
19 | assert(net); | 20 | assert(net); |
20 | 21 | ||
21 | // Train. | 22 | // Train. |
22 | 23 | ||
23 | // Try to learn the Y=X line. | 24 | // Try to learn the Y=X line. |
24 | #define N 2 | 25 | #define N 2 |
25 | const R inputs[N] = { 0., 1. }; | 26 | const R inputs[N] = {0., 1.}; |
26 | const R targets[N] = { 0., 1. }; | 27 | const R targets[N] = {0., 1.}; |
27 | 28 | ||
28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | 29 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); |
29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | 30 | nnMatrix targets_matrix = nnMatrixMake(N, 1); |
@@ -31,26 +32,27 @@ TEST_CASE(neuralnet_train_linear_perceptron_test) { | |||
31 | nnMatrixInit(&targets_matrix, targets); | 32 | nnMatrixInit(&targets_matrix, targets); |
32 | 33 | ||
33 | nnTrainingParams params = { | 34 | nnTrainingParams params = { |
34 | .learning_rate = 0.7, | 35 | .learning_rate = 0.7, |
35 | .max_iterations = 10, | 36 | .max_iterations = 10, |
36 | .seed = 0, | 37 | .seed = 0, |
37 | .weight_init = nnWeightInit01, | 38 | .weight_init = nnWeightInit01, |
38 | .debug = false, | 39 | .debug = false, |
39 | }; | 40 | }; |
40 | 41 | ||
41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 42 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
42 | 43 | ||
43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | 44 | const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); |
44 | const R expected_weight = 1.0; | 45 | const R expected_weight = 1.0; |
45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | 46 | printf( |
47 | "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | 48 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); |
47 | 49 | ||
48 | // Test. | 50 | // Test. |
49 | 51 | ||
50 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 52 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
51 | 53 | ||
52 | const R test_input[] = { 2.3 }; | 54 | const R test_input[] = {2.3}; |
53 | R test_output[1]; | 55 | R test_output[1]; |
54 | nnQueryArray(net, query, test_input, test_output); | 56 | nnQueryArray(net, query, test_input, test_output); |
55 | 57 | ||
56 | const R expected_output = test_input[0]; | 58 | const R expected_output = test_input[0]; |
diff --git a/src/lib/test/train_sigmoid_test.c b/src/lib/test/train_sigmoid_test.c index 588e7ca..39a84b0 100644 --- a/src/lib/test/train_sigmoid_test.c +++ b/src/lib/test/train_sigmoid_test.c | |||
@@ -1,9 +1,9 @@ | |||
1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
2 | 2 | ||
3 | #include <neuralnet/matrix.h> | ||
4 | #include <neuralnet/neuralnet.h> | ||
5 | #include "activation.h" | 3 | #include "activation.h" |
6 | #include "neuralnet_impl.h" | 4 | #include "neuralnet_impl.h" |
5 | #include <neuralnet/matrix.h> | ||
6 | #include <neuralnet/neuralnet.h> | ||
7 | 7 | ||
8 | #include "test.h" | 8 | #include "test.h" |
9 | #include "test_util.h" | 9 | #include "test_util.h" |
@@ -11,21 +11,24 @@ | |||
11 | #include <assert.h> | 11 | #include <assert.h> |
12 | 12 | ||
13 | TEST_CASE(neuralnet_train_sigmoid_test) { | 13 | TEST_CASE(neuralnet_train_sigmoid_test) { |
14 | const int num_layers = 1; | 14 | const int num_layers = 2; |
15 | const int layer_sizes[] = { 1, 1 }; | 15 | const int input_size = 1; |
16 | const nnActivation layer_activations[] = { nnSigmoid }; | 16 | const nnLayer layers[] = { |
17 | {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}}, | ||
18 | {.type = nnSigmoid}, | ||
19 | }; | ||
17 | 20 | ||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 21 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
19 | assert(net); | 22 | assert(net); |
20 | 23 | ||
21 | // Train. | 24 | // Train. |
22 | 25 | ||
23 | // Try to learn the sigmoid function. | 26 | // Try to learn the sigmoid function. |
24 | #define N 3 | 27 | #define N 3 |
25 | R inputs[N]; | 28 | R inputs[N]; |
26 | R targets[N]; | 29 | R targets[N]; |
27 | for (int i = 0; i < N; ++i) { | 30 | for (int i = 0; i < N; ++i) { |
28 | inputs[i] = lerp(-1, +1, (R)i / (R)(N-1)); | 31 | inputs[i] = lerp(-1, +1, (R)i / (R)(N - 1)); |
29 | targets[i] = sigmoid(inputs[i]); | 32 | targets[i] = sigmoid(inputs[i]); |
30 | } | 33 | } |
31 | 34 | ||
@@ -35,29 +38,30 @@ TEST_CASE(neuralnet_train_sigmoid_test) { | |||
35 | nnMatrixInit(&targets_matrix, targets); | 38 | nnMatrixInit(&targets_matrix, targets); |
36 | 39 | ||
37 | nnTrainingParams params = { | 40 | nnTrainingParams params = { |
38 | .learning_rate = 0.9, | 41 | .learning_rate = 0.9, |
39 | .max_iterations = 100, | 42 | .max_iterations = 100, |
40 | .seed = 0, | 43 | .seed = 0, |
41 | .weight_init = nnWeightInit01, | 44 | .weight_init = nnWeightInit01, |
42 | .debug = false, | 45 | .debug = false, |
43 | }; | 46 | }; |
44 | 47 | ||
45 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 48 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
46 | 49 | ||
47 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | 50 | const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); |
48 | const R expected_weight = 1.0; | 51 | const R expected_weight = 1.0; |
49 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | 52 | printf( |
53 | "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
50 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | 54 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); |
51 | 55 | ||
52 | // Test. | 56 | // Test. |
53 | 57 | ||
54 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 58 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
55 | 59 | ||
56 | const R test_input[] = { 0.3 }; | 60 | const R test_input[] = {0.3}; |
57 | R test_output[1]; | 61 | R test_output[1]; |
58 | nnQueryArray(net, query, test_input, test_output); | 62 | nnQueryArray(net, query, test_input, test_output); |
59 | 63 | ||
60 | const R expected_output = 0.574442516811659; // sigmoid(0.3) | 64 | const R expected_output = 0.574442516811659; // sigmoid(0.3) |
61 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | 65 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); |
62 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | 66 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); |
63 | 67 | ||
diff --git a/src/lib/test/train_xor_test.c b/src/lib/test/train_xor_test.c index 6ddc6e0..78695a3 100644 --- a/src/lib/test/train_xor_test.c +++ b/src/lib/test/train_xor_test.c | |||
@@ -1,9 +1,9 @@ | |||
1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
2 | 2 | ||
3 | #include <neuralnet/matrix.h> | ||
4 | #include <neuralnet/neuralnet.h> | ||
5 | #include "activation.h" | 3 | #include "activation.h" |
6 | #include "neuralnet_impl.h" | 4 | #include "neuralnet_impl.h" |
5 | #include <neuralnet/matrix.h> | ||
6 | #include <neuralnet/neuralnet.h> | ||
7 | 7 | ||
8 | #include "test.h" | 8 | #include "test.h" |
9 | #include "test_util.h" | 9 | #include "test_util.h" |
@@ -11,18 +11,27 @@ | |||
11 | #include <assert.h> | 11 | #include <assert.h> |
12 | 12 | ||
13 | TEST_CASE(neuralnet_train_xor_test) { | 13 | TEST_CASE(neuralnet_train_xor_test) { |
14 | const int num_layers = 2; | 14 | const int num_layers = 3; |
15 | const int layer_sizes[] = { 2, 2, 1 }; | 15 | const int input_size = 2; |
16 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | 16 | const nnLayer layers[] = { |
17 | {.type = nnLinear, .linear = {.input_size = 2, .output_size = 2}}, | ||
18 | {.type = nnRelu}, | ||
19 | {.type = nnLinear, .linear = {.input_size = 2, .output_size = 1}} | ||
20 | }; | ||
17 | 21 | ||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 22 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
19 | assert(net); | 23 | assert(net); |
20 | 24 | ||
21 | // Train. | 25 | // Train. |
22 | 26 | ||
23 | #define N 4 | 27 | #define N 4 |
24 | const R inputs[N][2] = { { 0., 0. }, { 0., 1. }, { 1., 0. }, { 1., 1. } }; | 28 | const R inputs[N][2] = { |
25 | const R targets[N] = { 0., 1., 1., 0. }; | 29 | {0., 0.}, |
30 | {0., 1.}, | ||
31 | {1., 0.}, | ||
32 | {1., 1.} | ||
33 | }; | ||
34 | const R targets[N] = {0., 1., 1., 0.}; | ||
26 | 35 | ||
27 | nnMatrix inputs_matrix = nnMatrixMake(N, 2); | 36 | nnMatrix inputs_matrix = nnMatrixMake(N, 2); |
28 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | 37 | nnMatrix targets_matrix = nnMatrixMake(N, 1); |
@@ -30,31 +39,37 @@ TEST_CASE(neuralnet_train_xor_test) { | |||
30 | nnMatrixInit(&targets_matrix, targets); | 39 | nnMatrixInit(&targets_matrix, targets); |
31 | 40 | ||
32 | nnTrainingParams params = { | 41 | nnTrainingParams params = { |
33 | .learning_rate = 0.1, | 42 | .learning_rate = 0.1, |
34 | .max_iterations = 500, | 43 | .max_iterations = 500, |
35 | .seed = 0, | 44 | .seed = 0, |
36 | .weight_init = nnWeightInit01, | 45 | .weight_init = nnWeightInit01, |
37 | .debug = false, | 46 | .debug = false, |
38 | }; | 47 | }; |
39 | 48 | ||
40 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 49 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
41 | 50 | ||
42 | // Test. | 51 | // Test. |
43 | 52 | ||
44 | #define M 4 | 53 | #define M 4 |
45 | 54 | ||
46 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | 55 | nnQueryObject* query = nnMakeQueryObject(net, M); |
47 | 56 | ||
48 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | 57 | const R test_inputs[M][2] = { |
58 | {0., 0.}, | ||
59 | {1., 0.}, | ||
60 | {0., 1.}, | ||
61 | {1., 1.} | ||
62 | }; | ||
49 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | 63 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); |
50 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | 64 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); |
51 | nnQuery(net, query, &test_inputs_matrix); | 65 | nnQuery(net, query, &test_inputs_matrix); |
52 | 66 | ||
53 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | 67 | const R expected_outputs[M] = {0., 1., 1., 0.}; |
54 | for (int i = 0; i < M; ++i) { | 68 | for (int i = 0; i < M; ++i) { |
55 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 69 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |
56 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | 70 | printf( |
57 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | 71 | "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0], |
72 | test_inputs[i][1], test_output, expected_outputs[i]); | ||
58 | } | 73 | } |
59 | for (int i = 0; i < M; ++i) { | 74 | for (int i = 0; i < M; ++i) { |
60 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 75 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |