From 653e98e029a0d0f110b0ac599e50406060bb0f87 Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Sat, 16 Dec 2023 10:21:16 -0800 Subject: Decouple activations from linear layer. --- src/bin/mnist/src/main.c | 195 ++++++++++-------- src/lib/include/neuralnet/matrix.h | 3 + src/lib/include/neuralnet/neuralnet.h | 51 +++-- src/lib/src/activation.h | 4 +- src/lib/src/matrix.c | 6 + src/lib/src/neuralnet.c | 218 +++++++++++---------- src/lib/src/neuralnet_impl.h | 35 ++-- src/lib/src/train.c | 182 +++++++++-------- src/lib/test/neuralnet_test.c | 103 ++++++---- .../test/train_linear_perceptron_non_origin_test.c | 46 ++--- src/lib/test/train_linear_perceptron_test.c | 44 +++-- src/lib/test/train_sigmoid_test.c | 46 +++-- src/lib/test/train_xor_test.c | 55 ++++-- 13 files changed, 559 insertions(+), 429 deletions(-) diff --git a/src/bin/mnist/src/main.c b/src/bin/mnist/src/main.c index 9aa3ce5..53e0197 100644 --- a/src/bin/mnist/src/main.c +++ b/src/bin/mnist/src/main.c @@ -29,32 +29,35 @@ static const double LABEL_UPPER_BOUND = 0.99; // Epsilon used to compare R values. static const double EPS = 1e-10; -#define min(a,b) ((a) < (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) typedef struct ImageSet { - nnMatrix images; // Images flattened into row vectors of the matrix. - nnMatrix labels; // One-hot-encoded labels. - int count; // Number of images and labels. - int rows; // Rows in an image. - int cols; // Columns in an image. + nnMatrix images; // Images flattened into row vectors of the matrix. + nnMatrix labels; // One-hot-encoded labels. + int count; // Number of images and labels. + int rows; // Rows in an image. + int cols; // Columns in an image. } ImageSet; static void usage(const char* argv0) { - fprintf(stderr, "Usage: %s [num images]\n", argv0); + fprintf( + stderr, "Usage: %s [num images]\n", + argv0); fprintf(stderr, "\n"); - fprintf(stderr, " Use -1 for [num images] to use all the images in the data set\n"); + fprintf( + stderr, + " Use -1 for [num images] to use all the images in the data set\n"); } -static bool R_eq(R a, R b) { - return fabs(a-b) <= EPS; -} +static bool R_eq(R a, R b) { return fabs(a - b) <= EPS; } -static void PrintImage(const nnMatrix* images, int rows, int cols, int image_index) { +static void PrintImage( + const nnMatrix* images, int rows, int cols, int image_index) { assert(images); assert((0 <= image_index) && (image_index < images->rows)); // Top line. - for (int j = 0; j < cols/2; ++j) { + for (int j = 0; j < cols / 2; ++j) { printf(" -"); } printf("\n"); @@ -68,8 +71,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind printf("#"); } else if (*value > 0.5) { printf("*"); - } - else if (*value > PIXEL_LOWER_BOUND) { + } else if (*value > PIXEL_LOWER_BOUND) { printf(":"); } else if (*value == 0.0) { // Values should not be exactly 0, otherwise they cancel out weights @@ -84,7 +86,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind } // Bottom line. - for (int j = 0; j < cols/2; ++j) { + for (int j = 0; j < cols / 2; ++j) { printf(" -"); } printf("\n"); @@ -96,7 +98,7 @@ static void PrintLabel(const nnMatrix* labels, int label_index) { // Compute the label from the one-hot encoding. const R* value = nnMatrixRow(labels, label_index); - int label = -1; + int label = -1; for (int i = 0; i < 10; ++i) { if (R_eq(*value++, LABEL_UPPER_BOUND)) { label = i; @@ -113,13 +115,12 @@ static void PrintLabel(const nnMatrix* labels, int label_index) { printf(")\n"); } -static R lerp(R a, R b, R t) { - return a + t*(b-a); -} +static R lerp(R a, R b, R t) { return a + t * (b - a); } /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. static R FormatPixel(uint8_t pixel) { - const R value = (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; + const R value = + (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; assert(value >= PIXEL_LOWER_BOUND); assert(value <= 1.0); return value; @@ -152,7 +153,8 @@ static void ImageToMatrix( } } -static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_set) { +static bool ReadImages( + gzFile images_file, int max_num_images, ImageSet* image_set) { assert(images_file != Z_NULL); assert(image_set); @@ -161,36 +163,41 @@ static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_s uint8_t* pixels = 0; int32_t magic, total_images, rows, cols; - if ( (gzread(images_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || - (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != sizeof(int32_t)) || - (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || - (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t)) ) { + if ((gzread(images_file, (char*)&magic, sizeof(int32_t)) != + sizeof(int32_t)) || + (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != + sizeof(int32_t)) || + (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || + (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t))) { fprintf(stderr, "Failed to read header\n"); goto cleanup; } - magic = ReverseEndian32(magic); + magic = ReverseEndian32(magic); total_images = ReverseEndian32(total_images); - rows = ReverseEndian32(rows); - cols = ReverseEndian32(cols); + rows = ReverseEndian32(rows); + cols = ReverseEndian32(cols); if (magic != IMAGE_FILE_MAGIC) { - fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", - magic, IMAGE_FILE_MAGIC); + fprintf( + stderr, "Magic number mismatch. Got %x, expected: %x\n", magic, + IMAGE_FILE_MAGIC); goto cleanup; } - printf("Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", - magic, total_images, rows, cols); + printf( + "Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", magic, + total_images, rows, cols); - total_images = max_num_images >= 0 ? min(total_images, max_num_images) : total_images; + total_images = + max_num_images >= 0 ? min(total_images, max_num_images) : total_images; // Images are flattened into single row vectors. const int num_pixels = rows * cols; - image_set->images = nnMatrixMake(total_images, num_pixels); - image_set->count = total_images; - image_set->rows = rows; - image_set->cols = cols; + image_set->images = nnMatrixMake(total_images, num_pixels); + image_set->count = total_images; + image_set->rows = rows; + image_set->cols = cols; pixels = calloc(1, num_pixels); if (!pixels) { @@ -219,30 +226,31 @@ cleanup: return success; } -static void OneHotEncode(const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { +static void OneHotEncode( + const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { assert(labels_bytes); assert(labels); assert(labels->rows == num_labels); assert(labels->cols == 10); static const R one_hot[10][10] = { - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, - { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, }; R* value = labels->values; for (int i = 0; i < num_labels; ++i) { - const uint8_t label = labels_bytes[i]; - const R* one_hot_value = one_hot[label]; + const uint8_t label = labels_bytes[i]; + const R* one_hot_value = one_hot[label]; for (int j = 0; j < 10; ++j) { *value++ = FormatLabel(*one_hot_value++); @@ -255,13 +263,13 @@ static int OneHotDecode(const nnMatrix* label_matrix) { assert(label_matrix->cols == 10); assert(label_matrix->rows == 1); - R max_value = 0; - int pos_max = 0; + R max_value = 0; + int pos_max = 0; for (int i = 0; i < 10; ++i) { const R value = nnMatrixAt(label_matrix, 0, i); if (value > max_value) { max_value = value; - pos_max = i; + pos_max = i; } } assert(pos_max >= 0); @@ -269,7 +277,8 @@ static int OneHotDecode(const nnMatrix* label_matrix) { return pos_max; } -static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_set) { +static bool ReadLabels( + gzFile labels_file, int max_num_labels, ImageSet* image_set) { assert(labels_file != Z_NULL); assert(image_set != 0); @@ -278,24 +287,28 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s uint8_t* labels = 0; int32_t magic, total_labels; - if ( (gzread(labels_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || - (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != sizeof(int32_t)) ) { + if ((gzread(labels_file, (char*)&magic, sizeof(int32_t)) != + sizeof(int32_t)) || + (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != + sizeof(int32_t))) { fprintf(stderr, "Failed to read header\n"); goto cleanup; } - magic = ReverseEndian32(magic); + magic = ReverseEndian32(magic); total_labels = ReverseEndian32(total_labels); if (magic != LABEL_FILE_MAGIC) { - fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", - magic, LABEL_FILE_MAGIC); + fprintf( + stderr, "Magic number mismatch. Got %x, expected: %x\n", magic, + LABEL_FILE_MAGIC); goto cleanup; } printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels); - total_labels = max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; + total_labels = + max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; assert(image_set->count == total_labels); @@ -308,7 +321,8 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s goto cleanup; } - if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != total_labels) { + if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != + total_labels) { fprintf(stderr, "Failed to read labels\n"); goto cleanup; } @@ -335,17 +349,17 @@ int main(int argc, const char** argv) { bool success = false; - gzFile train_images_file = Z_NULL; - gzFile train_labels_file = Z_NULL; - gzFile test_images_file = Z_NULL; - gzFile test_labels_file = Z_NULL; - ImageSet train_set = { 0 }; - ImageSet test_set = { 0 }; - nnNeuralNetwork* net = 0; - nnQueryObject* query = 0; + gzFile train_images_file = Z_NULL; + gzFile train_labels_file = Z_NULL; + gzFile test_images_file = Z_NULL; + gzFile test_labels_file = Z_NULL; + ImageSet train_set = {0}; + ImageSet test_set = {0}; + nnNeuralNetwork* net = 0; + nnQueryObject* query = 0; const char* mnist_files_dir = argv[1]; - const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; + const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; char train_labels_path[PATH_MAX]; char train_images_path[PATH_MAX]; @@ -353,12 +367,12 @@ int main(int argc, const char** argv) { char test_images_path[PATH_MAX]; strlcpy(train_labels_path, mnist_files_dir, PATH_MAX); strlcpy(train_images_path, mnist_files_dir, PATH_MAX); - strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); - strlcpy(test_images_path, mnist_files_dir, PATH_MAX); + strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); + strlcpy(test_images_path, mnist_files_dir, PATH_MAX); strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX); strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX); - strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); - strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); + strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); + strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); train_images_file = gzopen(train_images_path, "r"); if (train_images_file == Z_NULL) { @@ -406,11 +420,18 @@ int main(int argc, const char** argv) { } // Network definition. - const int image_size_pixels = train_set.rows * train_set.cols; - const int num_layers = 2; - const int layer_sizes[3] = { image_size_pixels, 100, 10 }; - const nnActivation layer_activations[2] = { nnSigmoid, nnSigmoid }; - if (!(net = nnMakeNet(num_layers, layer_sizes, layer_activations))) { + const int image_size_pixels = train_set.rows * train_set.cols; + const int num_layers = 4; + const int hidden_size = 100; + const nnLayer layers[4] = { + {.type = nnLinear, + .linear = {.input_size = image_size_pixels, .output_size = hidden_size}}, + {.type = nnSigmoid}, + {.type = nnLinear, + .linear = {.input_size = hidden_size, .output_size = 10}}, + {.type = nnSigmoid} + }; + if (!(net = nnMakeNet(layers, num_layers, image_size_pixels))) { fprintf(stderr, "Failed to create neural network\n"); goto cleanup; } @@ -418,17 +439,17 @@ int main(int argc, const char** argv) { // Train. printf("Training with up to %d images from the data set\n\n", max_num_images); const nnTrainingParams training_params = { - .learning_rate = 0.1, - .max_iterations = TRAIN_ITERATIONS, - .seed = 0, - .weight_init = nnWeightInitNormal, - .debug = true, + .learning_rate = 0.1, + .max_iterations = TRAIN_ITERATIONS, + .seed = 0, + .weight_init = nnWeightInitNormal, + .debug = true, }; nnTrain(net, &train_set.images, &train_set.labels, &training_params); // Test. int hits = 0; - query = nnMakeQueryObject(net, /*num_inputs=*/1); + query = nnMakeQueryObject(net, /*num_inputs=*/1); for (int i = 0; i < test_set.count; ++i) { const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1); const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1); @@ -444,7 +465,7 @@ int main(int argc, const char** argv) { } const R hit_ratio = (R)hits / (R)test_set.count; printf("Test images: %d\n", test_set.count); - printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio*100); + printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio * 100); success = true; diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h index b7281bf..f80b985 100644 --- a/src/lib/include/neuralnet/matrix.h +++ b/src/lib/include/neuralnet/matrix.h @@ -17,6 +17,9 @@ nnMatrix nnMatrixMake(int rows, int cols); /// Delete a matrix and free its internal memory. void nnMatrixDel(nnMatrix*); +/// Construct a matrix from an array of values. +nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]); + /// Move a matrix. /// /// |in| is an empty matrix after the move. diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h index 05c9406..f122c2a 100644 --- a/src/lib/include/neuralnet/neuralnet.h +++ b/src/lib/include/neuralnet/neuralnet.h @@ -1,32 +1,45 @@ #pragma once +#include #include -typedef struct nnMatrix nnMatrix; - typedef struct nnNeuralNetwork nnNeuralNetwork; typedef struct nnQueryObject nnQueryObject; -/// Neuron activation. -typedef enum nnActivation { - nnIdentity, +/// Linear layer parameters. +/// +/// Either one of the following must be set: +/// a) Training: input and output sizes. +/// b) Inference: weights + biases. +typedef struct nnLinearParams { + int input_size; + int output_size; + nnMatrix weights; + nnMatrix biases; +} nnLinearParams; + +/// Layer type. +typedef enum nnLayerType { + nnLinear, nnSigmoid, nnRelu, -} nnActivation; +} nnLayerType; + +/// Neural network layer. +typedef struct nnLayer { + nnLayerType type; + union { + nnLinearParams linear; + }; +} nnLayer; /// Create a network. nnNeuralNetwork* nnMakeNet( - int num_layers, const int* layer_sizes, const nnActivation* activations); + const nnLayer* layers, int num_layers, int input_size); /// Delete the network and free its internal memory. void nnDeleteNet(nnNeuralNetwork**); -/// Set the network's weights. -void nnSetWeights(nnNeuralNetwork*, const R* weights); - -/// Set the network's biases. -void nnSetBiases(nnNeuralNetwork*, const R* biases); - /// Query the network. /// /// |input| is a matrix of inputs, one row per input and as many columns as the @@ -42,10 +55,10 @@ void nnQueryArray( /// Create a query object. /// -/// The query object holds all the internal memory required to query a network. -/// Query objects allocate all memory up front so that network queries can run -/// without additional memory allocation. -nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int num_inputs); +/// The query object holds all the internal memory required to query a network +/// with batches of the given size. Memory is allocated up front so that network +/// queries can run without additional memory allocation. +nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int batch_size); /// Delete the query object and free its internal memory. void nnDeleteQueryObject(nnQueryObject**); @@ -60,7 +73,7 @@ int nnNetInputSize(const nnNeuralNetwork*); int nnNetOutputSize(const nnNeuralNetwork*); /// Return the layer's input size. -int nnLayerInputSize(const nnMatrix* weights); +int nnLayerInputSize(const nnNeuralNetwork*, int layer); /// Return the layer's output size. -int nnLayerOutputSize(const nnMatrix* weights); +int nnLayerOutputSize(const nnNeuralNetwork*, int layer); diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h index b56a69e..4c8a9e4 100644 --- a/src/lib/src/activation.h +++ b/src/lib/src/activation.h @@ -9,8 +9,8 @@ static inline R sigmoid(R x) { return 1. / (1. + exp(-x)); } static inline R relu(R x) { return fmax(0, x); } #define NN_MAP_ARRAY(f, in, out, size) \ - for (int i = 0; i < size; ++i) { \ - out[i] = f(in[i]); \ + for (int ii = 0; ii < size; ++ii) { \ + out[ii] = f(in[ii]); \ } #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c index d98c8bb..d5c3fcc 100644 --- a/src/lib/src/matrix.c +++ b/src/lib/src/matrix.c @@ -26,6 +26,12 @@ void nnMatrixDel(nnMatrix* matrix) { } } +nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]) { + nnMatrix m = nnMatrixMake(rows, cols); + nnMatrixInit(&m, values); + return m; +} + void nnMatrixMove(nnMatrix* in, nnMatrix* out) { assert(in); assert(out); diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c index a5fc59b..4322b8c 100644 --- a/src/lib/src/neuralnet.c +++ b/src/lib/src/neuralnet.c @@ -7,11 +7,65 @@ #include #include +static void MakeLayerImpl( + int prev_layer_output_size, const nnLayer* layer, nnLayerImpl* impl) { + impl->type = layer->type; + + switch (layer->type) { + case nnLinear: { + const nnLinearParams* params = &layer->linear; + nnLinearImpl* linear = &impl->linear; + + if ((params->input_size > 0) && (params->output_size > 0)) { + const int rows = params->input_size; + const int cols = params->output_size; + linear->weights = nnMatrixMake(rows, cols); + linear->biases = nnMatrixMake(1, cols); + linear->owned = true; + } else { + linear->weights = params->weights; + linear->biases = params->biases; + linear->owned = false; + } + + impl->input_size = linear->weights.rows; + impl->output_size = linear->weights.cols; + + break; + } + + // Activation layers. + case nnRelu: + case nnSigmoid: + impl->input_size = prev_layer_output_size; + impl->output_size = prev_layer_output_size; + break; + } +} + +static void DeleteLayer(nnLayerImpl* layer) { + switch (layer->type) { + case nnLinear: { + nnLinearImpl* linear = &layer->linear; + if (linear->owned) { + nnMatrixDel(&linear->weights); + nnMatrixDel(&linear->biases); + } + break; + } + + // No parameters for these layers. + case nnRelu: + case nnSigmoid: + break; + } +} + nnNeuralNetwork* nnMakeNet( - int num_layers, const int* layer_sizes, const nnActivation* activations) { + const nnLayer* layers, int num_layers, int input_size) { + assert(layers); assert(num_layers > 0); - assert(layer_sizes); - assert(activations); + assert(input_size > 0); nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork)); if (net == 0) { @@ -20,84 +74,38 @@ nnNeuralNetwork* nnMakeNet( net->num_layers = num_layers; - net->weights = calloc(num_layers, sizeof(nnMatrix)); - net->biases = calloc(num_layers, sizeof(nnMatrix)); - net->activations = calloc(num_layers, sizeof(nnActivation)); - if ((net->weights == 0) || (net->biases == 0) || (net->activations == 0)) { + net->layers = calloc(num_layers, sizeof(nnLayerImpl)); + if (net->layers == 0) { nnDeleteNet(&net); return 0; } + int prev_layer_output_size = input_size; for (int l = 0; l < num_layers; ++l) { - // layer_sizes = { input layer size, first hidden layer size, ...} - const int layer_input_size = layer_sizes[l]; - const int layer_output_size = layer_sizes[l + 1]; - - // We store the transpose of the weight matrix as written in textbooks. - // Our vectors are row vectors and the matrices row-major. - const int rows = layer_input_size; - const int cols = layer_output_size; - - net->weights[l] = nnMatrixMake(rows, cols); - net->biases[l] = nnMatrixMake(1, cols); - net->activations[l] = activations[l]; + MakeLayerImpl(prev_layer_output_size, &layers[l], &net->layers[l]); + prev_layer_output_size = net->layers[l].output_size; } return net; } -void nnDeleteNet(nnNeuralNetwork** net) { - if ((!net) || (!(*net))) { +void nnDeleteNet(nnNeuralNetwork** ppNet) { + if ((!ppNet) || (!(*ppNet))) { return; } - if ((*net)->weights != 0) { - for (int l = 0; l < (*net)->num_layers; ++l) { - nnMatrixDel(&(*net)->weights[l]); - } - free((*net)->weights); - (*net)->weights = 0; - } - if ((*net)->biases != 0) { - for (int l = 0; l < (*net)->num_layers; ++l) { - nnMatrixDel(&(*net)->biases[l]); - } - free((*net)->biases); - (*net)->biases = 0; - } - if ((*net)->activations) { - free((*net)->activations); - (*net)->activations = 0; - } - free(*net); - *net = 0; -} - -void nnSetWeights(nnNeuralNetwork* net, const R* weights) { - assert(net); - assert(weights); + nnNeuralNetwork* net = *ppNet; for (int l = 0; l < net->num_layers; ++l) { - nnMatrix* layer_weights = &net->weights[l]; - R* layer_values = layer_weights->values; - - for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) { - *layer_values++ = *weights++; - } + DeleteLayer(&net->layers[l]); } -} - -void nnSetBiases(nnNeuralNetwork* net, const R* biases) { - assert(net); - assert(biases); - - for (int l = 0; l < net->num_layers; ++l) { - nnMatrix* layer_biases = &net->biases[l]; - R* layer_values = layer_biases->values; - for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) { - *layer_values++ = *biases++; - } + if (net->layers) { + free(net->layers); + net->layers = 0; } + + free(net); + *ppNet = 0; } void nnQuery( @@ -114,35 +122,40 @@ void nnQuery( nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1); for (int l = 0; l < net->num_layers; ++l) { - const nnMatrix* layer_weights = &net->weights[l]; - const nnMatrix* layer_biases = &net->biases[l]; - // Y^T = (W*X)^T = X^T*W^T - // - // TODO: If we had a row-row matrix multiplication, we could compute: - // Y^T = W ** X^T - // The row-row multiplication could be more cache-friendly. We just need - // to store W as is, without transposing. - // We could also rewrite the original Mul function to go row x row, - // decomposing the multiplication. Preserving the original meaning of Mul - // makes everything clearer. nnMatrix output_vector = nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); - nnMatrixMul(&input_vector, layer_weights, &output_vector); - nnMatrixAddRow(&output_vector, layer_biases, &output_vector); - switch (net->activations[l]) { - case nnIdentity: - break; // Nothing to do for the identity function. - case nnSigmoid: - sigmoid_array( - output_vector.values, output_vector.values, output_vector.cols); + switch (net->layers[l].type) { + case nnLinear: { + const nnLinearImpl* linear = &net->layers[l].linear; + const nnMatrix* layer_weights = &linear->weights; + const nnMatrix* layer_biases = &linear->biases; + + // Y^T = (W*X)^T = X^T*W^T + // + // TODO: If we had a row-row matrix multiplication, we could compute: + // Y^T = W ** X^T + // + // The row-row multiplication could be more cache-friendly. We just need + // to store W as is, without transposing. + // + // We could also rewrite the original Mul function to go row x row, + // decomposing the multiplication. Preserving the original meaning of + // Mul makes everything clearer. + nnMatrixMul(&input_vector, layer_weights, &output_vector); + nnMatrixAddRow(&output_vector, layer_biases, &output_vector); break; + } case nnRelu: + assert(input_vector.cols == output_vector.cols); relu_array( - output_vector.values, output_vector.values, output_vector.cols); + input_vector.values, output_vector.values, output_vector.cols); + break; + case nnSigmoid: + assert(input_vector.cols == output_vector.cols); + sigmoid_array( + input_vector.values, output_vector.values, output_vector.cols); break; - default: - assert(0); } input_vector = output_vector; // Borrow. @@ -159,15 +172,15 @@ void nnQueryArray( assert(output); assert(net->num_layers > 0); - nnMatrix input_vector = nnMatrixMake(net->weights[0].cols, 1); + nnMatrix input_vector = nnMatrixMake(1, nnNetInputSize(net)); nnMatrixInit(&input_vector, input); nnQuery(net, query, &input_vector); nnMatrixRowToArray(query->network_outputs, 0, output); } -nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { +nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int batch_size) { assert(net); - assert(num_inputs > 0); + assert(batch_size > 0); assert(net->num_layers > 0); nnQueryObject* query = calloc(1, sizeof(nnQueryObject)); @@ -183,11 +196,12 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { free(query); return 0; } + for (int l = 0; l < net->num_layers; ++l) { - const nnMatrix* layer_weights = &net->weights[l]; - const int layer_output_size = nnLayerOutputSize(layer_weights); - query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size); + const int layer_output_size = nnLayerOutputSize(net, l); + query->layer_outputs[l] = nnMatrixMake(batch_size, layer_output_size); } + query->network_outputs = &query->layer_outputs[net->num_layers - 1]; return query; @@ -213,23 +227,19 @@ const nnMatrix* nnNetOutputs(const nnQueryObject* query) { } int nnNetInputSize(const nnNeuralNetwork* net) { - assert(net); - assert(net->num_layers > 0); - return net->weights[0].rows; + return nnLayerInputSize(net, 0); } int nnNetOutputSize(const nnNeuralNetwork* net) { - assert(net); - assert(net->num_layers > 0); - return net->weights[net->num_layers - 1].cols; + return nnLayerOutputSize(net, net->num_layers - 1); } -int nnLayerInputSize(const nnMatrix* weights) { - assert(weights); - return weights->rows; +int nnLayerInputSize(const nnNeuralNetwork* net, int layer) { + assert(net); + return net->layers[layer].input_size; } -int nnLayerOutputSize(const nnMatrix* weights) { - assert(weights); - return weights->cols; +int nnLayerOutputSize(const nnNeuralNetwork* net, int layer) { + assert(net); + return net->layers[layer].output_size; } diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h index f5a9c63..935c5ea 100644 --- a/src/lib/src/neuralnet_impl.h +++ b/src/lib/src/neuralnet_impl.h @@ -2,22 +2,29 @@ #include +#include + +/// Linear layer parameters. +typedef struct nnLinearImpl { + nnMatrix weights; + nnMatrix biases; + bool owned; /// Whether the library owns the weights and biases. +} nnLinearImpl; + +/// Neural network layer. +typedef struct nnLayerImpl { + nnLayerType type; + int input_size; + int output_size; + union { + nnLinearImpl linear; + }; +} nnLayerImpl; + /// Neural network object. -/// -/// We store the transposes of the weight matrices so that we can do forward -/// passes with a minimal amount of work. That is, if in paper we write: -/// -/// [w11 w21] -/// [w12 w22] -/// -/// then the weight matrix in memory is stored as the following array: -/// -/// w11 w12 w21 w22 typedef struct nnNeuralNetwork { - int num_layers; // Number of non-input layers (hidden + output). - nnMatrix* weights; // One matrix per non-input layer. - nnMatrix* biases; // One vector per non-input layer. - nnActivation* activations; // One per non-input layer. + int num_layers; // Number of non-input layers (hidden + output). + nnLayerImpl* layers; // One per non-input layer. } nnNeuralNetwork; /// A query object that holds all the memory necessary to query a network. diff --git a/src/lib/src/train.c b/src/lib/src/train.c index dc93f0f..98f58ad 100644 --- a/src/lib/src/train.c +++ b/src/lib/src/train.c @@ -38,7 +38,7 @@ typedef struct nnSigmoidGradientElements { /// each layer. A data type is defined for these because we allocate all the /// required memory up front before entering the training loop. typedef struct nnGradientElements { - nnActivation type; + nnLayerType type; // Gradient vector, same size as the layer. // This will contain the gradient expression except for the output value of // the previous layer. @@ -57,10 +57,27 @@ void nnInitNet( mt19937_64_init(&rng, seed); for (int l = 0; l < net->num_layers; ++l) { - nnMatrix* weights = &net->weights[l]; - nnMatrix* biases = &net->biases[l]; + // Get the layer's weights and biases, if any. + nnMatrix* weights = 0; + nnMatrix* biases = 0; + switch (net->layers[l].type) { + case nnLinear: { + nnLinearImpl* linear = &net->layers[l].linear; + + weights = &linear->weights; + biases = &linear->biases; + break; + } + // Activations. + case nnRelu: + case nnSigmoid: + break; + } + if (!weights || !biases) { + continue; + } - const R layer_size = (R)nnLayerInputSize(weights); + const R layer_size = (R)nnLayerInputSize(net, l); const R scale = 1. / layer_size; const R stdev = 1. / sqrt((R)layer_size); const R sigma = stdev * stdev; @@ -128,9 +145,6 @@ void nnTrain( // with one sample at a time. nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); - // Allocate the weight transpose matrices up front for backpropagation. - // nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix)); - // Allocate the weight delta matrices. nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); @@ -144,30 +158,24 @@ void nnTrain( nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); assert(errors != 0); - // assert(weights_T != 0); assert(weight_deltas != 0); assert(gradient_elems); assert(outputs_T); for (int l = 0; l < net->num_layers; ++l) { - const nnMatrix* layer_weights = &net->weights[l]; - const int layer_output_size = net->weights[l].cols; - const nnActivation activation = net->activations[l]; - - errors[l] = nnMatrixMake(1, layer_weights->cols); - - // weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows); - // nnMatrixTranspose(layer_weights, &weights_T[l]); - - weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols); + const int layer_input_size = nnLayerInputSize(net, l); + const int layer_output_size = nnLayerOutputSize(net, l); + const nnLayerImpl* layer = &net->layers[l]; - outputs_T[l] = nnMatrixMake(layer_output_size, 1); + errors[l] = nnMatrixMake(1, layer_output_size); + weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size); + outputs_T[l] = nnMatrixMake(layer_output_size, 1); // Allocate the gradient elements and vectors for weight delta calculation. nnGradientElements* elems = &gradient_elems[l]; - elems->type = activation; - switch (activation) { - case nnIdentity: + elems->type = layer->type; + switch (layer->type) { + case nnLinear: break; // Gradient vector will be borrowed, no need to allocate. case nnSigmoid: @@ -208,6 +216,7 @@ void nnTrain( // For now, we train with one sample at a time. for (int sample = 0; sample < inputs->rows; ++sample) { + // TODO: Introduce a BorrowMut. // Slice the input and target matrices with the batch size. // We are not mutating the inputs, but we need the cast to borrow. nnMatrix training_inputs = @@ -219,15 +228,16 @@ void nnTrain( // Assuming one training input per iteration for now. nnMatrixTranspose(&training_inputs, &training_inputs_T); - // Run a forward pass and compute the output layer error relevant to the - // derivative: o-t. - // Error: (t-o)^2 - // dE/do = -2(t-o) - // = +2(o-t) + // Forward pass. + nnQuery(net, query, &training_inputs); + + // Compute the error derivative: o-t. + // Error: 1/2 (t-o)^2 + // dE/do = -(t-o) + // = +(o-t) // Note that we compute o-t instead to remove that outer negative sign. // The 2 is dropped because we are only interested in the direction of the // gradient. The learning rate controls the magnitude. - nnQuery(net, query, &training_inputs); nnMatrixSub( training_outputs, &training_targets, &errors[net->num_layers - 1]); @@ -236,68 +246,86 @@ void nnTrain( nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); } - // Update weights and biases for each internal layer, backpropagating + // Update weights and biases for each internal layer, back-propagating // errors along the way. for (int l = net->num_layers - 1; l >= 0; --l) { - const nnMatrix* layer_output = &query->layer_outputs[l]; - nnMatrix* layer_weights = &net->weights[l]; - nnMatrix* layer_biases = &net->biases[l]; - nnGradientElements* elems = &gradient_elems[l]; - nnMatrix* gradient = &elems->gradient; - const nnActivation activation = net->activations[l]; - - // Compute the gradient (the part of the expression that does not - // contain the output of the previous layer). + const nnMatrix* layer_output = &query->layer_outputs[l]; + nnGradientElements* elems = &gradient_elems[l]; + nnMatrix* gradient = &elems->gradient; + nnLayerImpl* layer = &net->layers[l]; + + // Compute this layer's gradient. + // + // By "gradient" we mean the expression common to the weights and bias + // gradients. This is the part of the expression that does not contain + // this layer's input. // - // Identity: G = error_k - // Sigmoid: G = error_k * output_k * (1 - output_k). - // Relu: G = error_k * (output_k > 0 ? 1 : 0) - switch (activation) { - case nnIdentity: + // Linear: G = id + // Relu: G = (output_k > 0 ? 1 : 0) + // Sigmoid: G = output_k * (1 - output_k) + switch (layer->type) { + case nnLinear: { // TODO: Just copy the pointer? *gradient = nnMatrixBorrow(&errors[l]); break; + } + case nnRelu: + nnMatrixGt(layer_output, 0, gradient); + break; case nnSigmoid: nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); nnMatrixMulPairs(layer_output, gradient, gradient); - nnMatrixMulPairs(&errors[l], gradient, gradient); - break; - case nnRelu: - nnMatrixGt(layer_output, 0, gradient); - nnMatrixMulPairs(&errors[l], gradient, gradient); break; } - // Outer product to compute the weight deltas. - const nnMatrix* output_T = - (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; - nnMatrixMul(output_T, gradient, &weight_deltas[l]); - - // Backpropagate the error before updating weights. + // Back-propagate the error. + // + // This combines this layer's gradient with the back-propagated error, + // which is the combination of the gradients of subsequent layers down + // to the output layer error. + // + // Note that this step uses the layer's original weights. if (l > 0) { - // G * W^T == G *^T W. - // nnMatrixMul(gradient, &weights_T[l], &errors[l-1]); - nnMatrixMulRows(gradient, layer_weights, &errors[l - 1]); + switch (layer->type) { + case nnLinear: { + const nnMatrix* layer_weights = &layer->linear.weights; + // E * W^T == E *^T W. + // Using nnMatrixMulRows, we avoid having to transpose the weight + // matrix. + nnMatrixMulRows(&errors[l], layer_weights, &errors[l - 1]); + break; + } + // For activations, the error back-propagates as is but multiplied by + // the layer's gradient. + case nnRelu: + case nnSigmoid: + nnMatrixMulPairs(&errors[l], gradient, &errors[l - 1]); + break; + } } - // Update weights. - nnMatrixScale(&weight_deltas[l], params->learning_rate); - // The gradient has a negative sign from -(t - o), but we have computed - // e = o - t instead, so we can subtract directly. - // nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights); - nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); - - // Update weight transpose matrix for the next training iteration. - // nnMatrixTranspose(layer_weights, &weights_T[l]); - - // Update biases. - // This is the same formula as for weights, except that the o_j term is - // just 1. We can simply re-use the gradient that we have already - // computed for the weight update. - // nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, - // layer_biases); - nnMatrixMulSub( - layer_biases, gradient, params->learning_rate, layer_biases); + // Update layer weights. + if (layer->type == nnLinear) { + nnLinearImpl* linear = &layer->linear; + nnMatrix* layer_weights = &linear->weights; + nnMatrix* layer_biases = &linear->biases; + + // Outer product to compute the weight deltas. + // This layer's input is the previous layer's output. + const nnMatrix* input_T = + (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; + nnMatrixMul(input_T, gradient, &weight_deltas[l]); + + // Update weights. + nnMatrixScale(&weight_deltas[l], params->learning_rate); + nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); + + // Update biases. + // This is the same formula as for weights, except that the o_j term + // is just 1. + nnMatrixMulSub( + layer_biases, gradient, params->learning_rate, layer_biases); + } } // TODO: Add this under a verbose debugging mode. @@ -334,12 +362,11 @@ void nnTrain( for (int l = 0; l < net->num_layers; ++l) { nnMatrixDel(&errors[l]); nnMatrixDel(&outputs_T[l]); - // nnMatrixDel(&weights_T[l]); nnMatrixDel(&weight_deltas[l]); nnGradientElements* elems = &gradient_elems[l]; switch (elems->type) { - case nnIdentity: + case nnLinear: break; // Gradient vector is borrowed, no need to deallocate. case nnSigmoid: @@ -355,7 +382,6 @@ void nnTrain( nnMatrixDel(&training_inputs_T); free(errors); free(outputs_T); - // free(weights_T); free(weight_deltas); free(gradient_elems); } diff --git a/src/lib/test/neuralnet_test.c b/src/lib/test/neuralnet_test.c index 14d9438..0f8d7b8 100644 --- a/src/lib/test/neuralnet_test.c +++ b/src/lib/test/neuralnet_test.c @@ -1,8 +1,8 @@ #include -#include #include "activation.h" #include "neuralnet_impl.h" +#include #include "test.h" #include "test_util.h" @@ -10,23 +10,31 @@ #include TEST_CASE(neuralnet_perceptron_test) { - const int num_layers = 1; - const int layer_sizes[] = { 1, 1 }; - const nnActivation layer_activations[] = { nnSigmoid }; - const R weights[] = { 0.3 }; + const int num_layers = 2; + const int input_size = 1; + const R weights[] = {0.3}; + const R biases[] = {0.0}; + const nnLayer layers[] = { + {.type = nnLinear, + .linear = + {.weights = nnMatrixFromArray(1, 1, weights), + .biases = nnMatrixFromArray(1, 1, biases)}}, + {.type = nnSigmoid}, + }; - nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); + nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); assert(net); - nnSetWeights(net, weights); - nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); + nnQueryObject* query = nnMakeQueryObject(net, 1); - const R input[] = { 0.9 }; - R output[1]; + const R input[] = {0.9}; + R output[1]; nnQueryArray(net, query, input, output); const R expected_output = sigmoid(input[0] * weights[0]); - printf("\nOutput: %f, Expected: %f\n", output[0], expected_output); + printf( + "\n[neuralnet_perceptron_test] Output: %f, Expected: %f\n", output[0], + expected_output); TEST_TRUE(double_eq(output[0], expected_output, EPS)); nnDeleteQueryObject(&query); @@ -34,53 +42,66 @@ TEST_CASE(neuralnet_perceptron_test) { } TEST_CASE(neuralnet_xor_test) { - const int num_layers = 2; - const int layer_sizes[] = { 2, 2, 1 }; - const nnActivation layer_activations[] = { nnRelu, nnIdentity }; - const R weights[] = { - 1, 1, 1, 1, // First (hidden) layer. - 1, -2 // Second (output) layer. - }; - const R biases[] = { - 0, -1, // First (hidden) layer. - 0 // Second (output) layer. + // First (hidden) layer. + const R weights0[] = {1, 1, 1, 1}; + const R biases0[] = {0, -1}; + // Second (output) layer. + const R weights1[] = {1, -2}; + const R biases1[] = {0}; + // Network. + const int num_layers = 3; + const int input_size = 2; + const nnLayer layers[] = { + {.type = nnLinear, + .linear = + {.weights = nnMatrixFromArray(2, 2, weights0), + .biases = nnMatrixFromArray(1, 2, biases0)}}, + {.type = nnRelu}, + {.type = nnLinear, + .linear = + {.weights = nnMatrixFromArray(2, 1, weights1), + .biases = nnMatrixFromArray(1, 1, biases1)}}, }; - nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); + nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); assert(net); - nnSetWeights(net, weights); - nnSetBiases(net, biases); // First layer weights. - TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 0), 1); - TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 1), 1); - TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 2), 1); - TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 3), 1); - // Second layer weights. - TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 0), 1); - TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 1), -2); + TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 0), 1); + TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 1), 1); + TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 2), 1); + TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 3), 1); + // Second linear layer (third layer) weights. + TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 0), 1); + TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 1), -2); // First layer biases. - TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 0), 0); - TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 1), -1); - // Second layer biases. - TEST_EQUAL(nnMatrixAt(&net->biases[1], 0, 0), 0); + TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 0), 0); + TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 1), -1); + // Second linear layer (third layer) biases. + TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.biases, 0, 0), 0); // Test. - #define M 4 +#define M 4 - nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); + nnQueryObject* query = nnMakeQueryObject(net, M); - const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; + const R test_inputs[M][2] = { + {0., 0.}, + {1., 0.}, + {0., 1.}, + {1., 1.} + }; nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); nnQuery(net, query, &test_inputs_matrix); - const R expected_outputs[M] = { 0., 1., 1., 0. }; + const R expected_outputs[M] = {0., 1., 1., 0.}; for (int i = 0; i < M; ++i) { const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); - printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", - test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); + printf( + "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0], + test_inputs[i][1], test_output, expected_outputs[i]); } for (int i = 0; i < M; ++i) { const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); diff --git a/src/lib/test/train_linear_perceptron_non_origin_test.c b/src/lib/test/train_linear_perceptron_non_origin_test.c index 5a320ac..40a42e0 100644 --- a/src/lib/test/train_linear_perceptron_non_origin_test.c +++ b/src/lib/test/train_linear_perceptron_non_origin_test.c @@ -1,9 +1,8 @@ #include +#include "neuralnet_impl.h" #include #include -#include "activation.h" -#include "neuralnet_impl.h" #include "test.h" #include "test_util.h" @@ -11,19 +10,21 @@ #include TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { - const int num_layers = 1; - const int layer_sizes[] = { 1, 1 }; - const nnActivation layer_activations[] = { nnIdentity }; + const int num_layers = 1; + const int input_size = 1; + const nnLayer layers[] = { + {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}} + }; - nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); + nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); assert(net); - // Train. +// Train. - // Try to learn the Y = 2X + 1 line. - #define N 2 - const R inputs[N] = { 0., 1. }; - const R targets[N] = { 1., 3. }; +// Try to learn the Y = 2X + 1 line. +#define N 2 + const R inputs[N] = {0., 1.}; + const R targets[N] = {1., 3.}; nnMatrix inputs_matrix = nnMatrixMake(N, 1); nnMatrix targets_matrix = nnMatrixMake(N, 1); @@ -31,31 +32,32 @@ TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { nnMatrixInit(&targets_matrix, targets); nnTrainingParams params = { - .learning_rate = 0.7, - .max_iterations = 20, - .seed = 0, - .weight_init = nnWeightInit01, - .debug = false, + .learning_rate = 0.7, + .max_iterations = 20, + .seed = 0, + .weight_init = nnWeightInit01, + .debug = false, }; nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); - const R weight = nnMatrixAt(&net->weights[0], 0, 0); + const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); const R expected_weight = 2.0; - printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); + printf( + "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); - const R bias = nnMatrixAt(&net->biases[0], 0, 0); + const R bias = nnMatrixAt(&net->layers[0].linear.biases, 0, 0); const R expected_bias = 1.0; printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias); TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS)); // Test. - nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); + nnQueryObject* query = nnMakeQueryObject(net, 1); - const R test_input[] = { 2.3 }; - R test_output[1]; + const R test_input[] = {2.3}; + R test_output[1]; nnQueryArray(net, query, test_input, test_output); const R expected_output = test_input[0] * expected_weight + expected_bias; diff --git a/src/lib/test/train_linear_perceptron_test.c b/src/lib/test/train_linear_perceptron_test.c index 2b1336d..667643b 100644 --- a/src/lib/test/train_linear_perceptron_test.c +++ b/src/lib/test/train_linear_perceptron_test.c @@ -1,9 +1,8 @@ #include +#include "neuralnet_impl.h" #include #include -#include "activation.h" -#include "neuralnet_impl.h" #include "test.h" #include "test_util.h" @@ -11,19 +10,21 @@ #include TEST_CASE(neuralnet_train_linear_perceptron_test) { - const int num_layers = 1; - const int layer_sizes[] = { 1, 1 }; - const nnActivation layer_activations[] = { nnIdentity }; + const int num_layers = 1; + const int input_size = 1; + const nnLayer layers[] = { + {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}} + }; - nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); + nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); assert(net); - // Train. +// Train. - // Try to learn the Y=X line. - #define N 2 - const R inputs[N] = { 0., 1. }; - const R targets[N] = { 0., 1. }; +// Try to learn the Y=X line. +#define N 2 + const R inputs[N] = {0., 1.}; + const R targets[N] = {0., 1.}; nnMatrix inputs_matrix = nnMatrixMake(N, 1); nnMatrix targets_matrix = nnMatrixMake(N, 1); @@ -31,26 +32,27 @@ TEST_CASE(neuralnet_train_linear_perceptron_test) { nnMatrixInit(&targets_matrix, targets); nnTrainingParams params = { - .learning_rate = 0.7, - .max_iterations = 10, - .seed = 0, - .weight_init = nnWeightInit01, - .debug = false, + .learning_rate = 0.7, + .max_iterations = 10, + .seed = 0, + .weight_init = nnWeightInit01, + .debug = false, }; nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); - const R weight = nnMatrixAt(&net->weights[0], 0, 0); + const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); const R expected_weight = 1.0; - printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); + printf( + "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); // Test. - nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); + nnQueryObject* query = nnMakeQueryObject(net, 1); - const R test_input[] = { 2.3 }; - R test_output[1]; + const R test_input[] = {2.3}; + R test_output[1]; nnQueryArray(net, query, test_input, test_output); const R expected_output = test_input[0]; diff --git a/src/lib/test/train_sigmoid_test.c b/src/lib/test/train_sigmoid_test.c index 588e7ca..39a84b0 100644 --- a/src/lib/test/train_sigmoid_test.c +++ b/src/lib/test/train_sigmoid_test.c @@ -1,9 +1,9 @@ #include -#include -#include #include "activation.h" #include "neuralnet_impl.h" +#include +#include #include "test.h" #include "test_util.h" @@ -11,21 +11,24 @@ #include TEST_CASE(neuralnet_train_sigmoid_test) { - const int num_layers = 1; - const int layer_sizes[] = { 1, 1 }; - const nnActivation layer_activations[] = { nnSigmoid }; + const int num_layers = 2; + const int input_size = 1; + const nnLayer layers[] = { + {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}}, + {.type = nnSigmoid}, + }; - nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); + nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); assert(net); - // Train. +// Train. - // Try to learn the sigmoid function. - #define N 3 +// Try to learn the sigmoid function. +#define N 3 R inputs[N]; R targets[N]; for (int i = 0; i < N; ++i) { - inputs[i] = lerp(-1, +1, (R)i / (R)(N-1)); + inputs[i] = lerp(-1, +1, (R)i / (R)(N - 1)); targets[i] = sigmoid(inputs[i]); } @@ -35,29 +38,30 @@ TEST_CASE(neuralnet_train_sigmoid_test) { nnMatrixInit(&targets_matrix, targets); nnTrainingParams params = { - .learning_rate = 0.9, - .max_iterations = 100, - .seed = 0, - .weight_init = nnWeightInit01, - .debug = false, + .learning_rate = 0.9, + .max_iterations = 100, + .seed = 0, + .weight_init = nnWeightInit01, + .debug = false, }; nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); - const R weight = nnMatrixAt(&net->weights[0], 0, 0); + const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); const R expected_weight = 1.0; - printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); + printf( + "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); // Test. - nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); + nnQueryObject* query = nnMakeQueryObject(net, 1); - const R test_input[] = { 0.3 }; - R test_output[1]; + const R test_input[] = {0.3}; + R test_output[1]; nnQueryArray(net, query, test_input, test_output); - const R expected_output = 0.574442516811659; // sigmoid(0.3) + const R expected_output = 0.574442516811659; // sigmoid(0.3) printf("Output: %f, Expected: %f\n", test_output[0], expected_output); TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); diff --git a/src/lib/test/train_xor_test.c b/src/lib/test/train_xor_test.c index 6ddc6e0..78695a3 100644 --- a/src/lib/test/train_xor_test.c +++ b/src/lib/test/train_xor_test.c @@ -1,9 +1,9 @@ #include -#include -#include #include "activation.h" #include "neuralnet_impl.h" +#include +#include #include "test.h" #include "test_util.h" @@ -11,18 +11,27 @@ #include TEST_CASE(neuralnet_train_xor_test) { - const int num_layers = 2; - const int layer_sizes[] = { 2, 2, 1 }; - const nnActivation layer_activations[] = { nnRelu, nnIdentity }; + const int num_layers = 3; + const int input_size = 2; + const nnLayer layers[] = { + {.type = nnLinear, .linear = {.input_size = 2, .output_size = 2}}, + {.type = nnRelu}, + {.type = nnLinear, .linear = {.input_size = 2, .output_size = 1}} + }; - nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); + nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); assert(net); // Train. - #define N 4 - const R inputs[N][2] = { { 0., 0. }, { 0., 1. }, { 1., 0. }, { 1., 1. } }; - const R targets[N] = { 0., 1., 1., 0. }; +#define N 4 + const R inputs[N][2] = { + {0., 0.}, + {0., 1.}, + {1., 0.}, + {1., 1.} + }; + const R targets[N] = {0., 1., 1., 0.}; nnMatrix inputs_matrix = nnMatrixMake(N, 2); nnMatrix targets_matrix = nnMatrixMake(N, 1); @@ -30,31 +39,37 @@ TEST_CASE(neuralnet_train_xor_test) { nnMatrixInit(&targets_matrix, targets); nnTrainingParams params = { - .learning_rate = 0.1, - .max_iterations = 500, - .seed = 0, - .weight_init = nnWeightInit01, - .debug = false, + .learning_rate = 0.1, + .max_iterations = 500, + .seed = 0, + .weight_init = nnWeightInit01, + .debug = false, }; nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); // Test. - #define M 4 +#define M 4 - nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); + nnQueryObject* query = nnMakeQueryObject(net, M); - const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; + const R test_inputs[M][2] = { + {0., 0.}, + {1., 0.}, + {0., 1.}, + {1., 1.} + }; nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); nnQuery(net, query, &test_inputs_matrix); - const R expected_outputs[M] = { 0., 1., 1., 0. }; + const R expected_outputs[M] = {0., 1., 1., 0.}; for (int i = 0; i < M; ++i) { const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); - printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", - test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); + printf( + "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0], + test_inputs[i][1], test_output, expected_outputs[i]); } for (int i = 0; i < M; ++i) { const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); -- cgit v1.2.3