13 files changed, 559 insertions, 429 deletions
diff --git a/src/bin/mnist/src/main.c b/src/bin/mnist/src/main.c
index 9aa3ce5..53e0197 100644
--- a/src/bin/mnist/src/main.c
+++ b/src/bin/mnist/src/main.c
@@ -29,32 +29,35 @@ static const double LABEL_UPPER_BOUND = 0.99;
 // Epsilon used to compare R values.
 static const double EPS = 1e-10;
-#define min(a,b) ((a) < (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
 typedef struct ImageSet {
-  nnMatrix images;  // Images flattened into row vectors of the matrix.
+  nnMatrix images; // Images flattened into row vectors of the matrix.
-  nnMatrix labels;  // One-hot-encoded labels.
+  nnMatrix labels; // One-hot-encoded labels.
-  int count;        // Number of images and labels.
+  int      count;  // Number of images and labels.
-  int rows;         // Rows in an image.
+  int      rows;   // Rows in an image.
-  int cols;         // Columns in an image.
+  int      cols;   // Columns in an image.
 } ImageSet;
 static void usage(const char* argv0) {
-  fprintf(stderr, "Usage: %s <path to mnist files directory> [num images]\n", argv0);
+  fprintf(
+      stderr, "Usage: %s <path to mnist files directory> [num images]\n",
+      argv0);
  fprintf(stderr, "\n");
-  fprintf(stderr, "  Use -1 for [num images] to use all the images in the data set\n");
+  fprintf(
+      stderr,
+      "  Use -1 for [num images] to use all the images in the data set\n");
 }
-static bool R_eq(R a, R b) {
+static bool R_eq(R a, R b) { return fabs(a - b) <= EPS; }
-  return fabs(a-b) <= EPS;
-}
-static void PrintImage(const nnMatrix* images, int rows, int cols, int image_index) {
+static void PrintImage(
+    const nnMatrix* images, int rows, int cols, int image_index) {
  assert(images);
  assert((0 <= image_index) && (image_index < images->rows));
  // Top line.
-  for (int j = 0; j < cols/2; ++j) {
+  for (int j = 0; j < cols / 2; ++j) {
    printf(" -");
  }
  printf("\n");
@@ -68,8 +71,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind
        printf("#");
      } else if (*value > 0.5) {
        printf("*");
-      }
+      } else if (*value > PIXEL_LOWER_BOUND) {
-      else if (*value > PIXEL_LOWER_BOUND) {
        printf(":");
      } else if (*value == 0.0) {
        // Values should not be exactly 0, otherwise they cancel out weights
@@ -84,7 +86,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind
  }
  // Bottom line.
-  for (int j = 0; j < cols/2; ++j) {
+  for (int j = 0; j < cols / 2; ++j) {
    printf(" -");
  }
  printf("\n");
@@ -96,7 +98,7 @@ static void PrintLabel(const nnMatrix* labels, int label_index) {
  // Compute the label from the one-hot encoding.
  const R* value = nnMatrixRow(labels, label_index);
-  int label = -1;
+  int      label = -1;
  for (int i = 0; i < 10; ++i) {
    if (R_eq(*value++, LABEL_UPPER_BOUND)) {
      label = i;
@@ -113,13 +115,12 @@ static void PrintLabel(const nnMatrix* labels, int label_index) {
  printf(")\n");
 }
-static R lerp(R a, R b, R t) {
+static R lerp(R a, R b, R t) { return a + t * (b - a); }
-  return a + t*(b-a);
-}
 /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0].
 static R FormatPixel(uint8_t pixel) {
-  const R value = (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND;
+  const R value =
+      (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND;
  assert(value >= PIXEL_LOWER_BOUND);
  assert(value <= 1.0);
  return value;
@@ -152,7 +153,8 @@ static void ImageToMatrix(
  }
 }
-static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_set) {
+static bool ReadImages(
+    gzFile images_file, int max_num_images, ImageSet* image_set) {
  assert(images_file != Z_NULL);
  assert(image_set);
@@ -161,36 +163,41 @@ static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_s
  uint8_t* pixels = 0;
  int32_t magic, total_images, rows, cols;
-  if ( (gzread(images_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) ||
+  if ((gzread(images_file, (char*)&magic, sizeof(int32_t)) !=
-       (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != sizeof(int32_t)) ||
+       sizeof(int32_t)) ||
-       (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) ||
+      (gzread(images_file, (char*)&total_images, sizeof(int32_t)) !=
-       (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t)) ) {
+       sizeof(int32_t)) ||
+      (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) ||
+      (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t))) {
    fprintf(stderr, "Failed to read header\n");
    goto cleanup;
  }
-  magic = ReverseEndian32(magic);
+  magic        = ReverseEndian32(magic);
  total_images = ReverseEndian32(total_images);
-  rows = ReverseEndian32(rows);
+  rows         = ReverseEndian32(rows);
-  cols = ReverseEndian32(cols);
+  cols         = ReverseEndian32(cols);
  if (magic != IMAGE_FILE_MAGIC) {
-    fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n",
+    fprintf(
-      magic, IMAGE_FILE_MAGIC);
+        stderr, "Magic number mismatch. Got %x, expected: %x\n", magic,
+        IMAGE_FILE_MAGIC);
    goto cleanup;
  }
-  printf("Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n",
+  printf(
-    magic, total_images, rows, cols);
+      "Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", magic,
+      total_images, rows, cols);
-  total_images = max_num_images >= 0 ? min(total_images, max_num_images) : total_images;
+  total_images =
+      max_num_images >= 0 ? min(total_images, max_num_images) : total_images;
  // Images are flattened into single row vectors.
  const int num_pixels = rows * cols;
-  image_set->images = nnMatrixMake(total_images, num_pixels);
+  image_set->images    = nnMatrixMake(total_images, num_pixels);
-  image_set->count = total_images;
+  image_set->count     = total_images;
-  image_set->rows = rows;
+  image_set->rows      = rows;
-  image_set->cols = cols;
+  image_set->cols      = cols;
  pixels = calloc(1, num_pixels);
  if (!pixels) {
@@ -219,30 +226,31 @@ cleanup:
  return success;
 }
-static void OneHotEncode(const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) {
+static void OneHotEncode(
+    const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) {
  assert(labels_bytes);
  assert(labels);
  assert(labels->rows == num_labels);
  assert(labels->cols == 10);
  static const R one_hot[10][10] = {
-    { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+      {0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
-    { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+      {0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
-    { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+      {0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
-    { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 },
+      {0, 0, 0, 0, 1, 0, 0, 0, 0, 0},
-    { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 },
+      {0, 0, 0, 0, 0, 1, 0, 0, 0, 0},
-    { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 },
+      {0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
-    { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
+      {0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
-    { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 },
+      {0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
-    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
  };
  R* value = labels->values;
  for (int i = 0; i < num_labels; ++i) {
-    const uint8_t label = labels_bytes[i];
+    const uint8_t label         = labels_bytes[i];
-    const R* one_hot_value = one_hot[label];
+    const R*      one_hot_value = one_hot[label];
    for (int j = 0; j < 10; ++j) {
      *value++ = FormatLabel(*one_hot_value++);
@@ -255,13 +263,13 @@ static int OneHotDecode(const nnMatrix* label_matrix) {
  assert(label_matrix->cols == 10);
  assert(label_matrix->rows == 1);
-  R max_value = 0;
+  R   max_value = 0;
-  int pos_max = 0;
+  int pos_max   = 0;
  for (int i = 0; i < 10; ++i) {
    const R value = nnMatrixAt(label_matrix, 0, i);
    if (value > max_value) {
      max_value = value;
-      pos_max = i;
+      pos_max   = i;
    }
  }
  assert(pos_max >= 0);
@@ -269,7 +277,8 @@ static int OneHotDecode(const nnMatrix* label_matrix) {
  return pos_max;
 }
-static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_set) {
+static bool ReadLabels(
+    gzFile labels_file, int max_num_labels, ImageSet* image_set) {
  assert(labels_file != Z_NULL);
  assert(image_set != 0);
@@ -278,24 +287,28 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s
  uint8_t* labels = 0;
  int32_t magic, total_labels;
-  if ( (gzread(labels_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) ||
+  if ((gzread(labels_file, (char*)&magic, sizeof(int32_t)) !=
-       (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != sizeof(int32_t)) ) {
+       sizeof(int32_t)) ||
+      (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) !=
+       sizeof(int32_t))) {
    fprintf(stderr, "Failed to read header\n");
    goto cleanup;
  }
-  magic = ReverseEndian32(magic);
+  magic        = ReverseEndian32(magic);
  total_labels = ReverseEndian32(total_labels);
  if (magic != LABEL_FILE_MAGIC) {
-    fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n",
+    fprintf(
-      magic, LABEL_FILE_MAGIC);
+        stderr, "Magic number mismatch. Got %x, expected: %x\n", magic,
+        LABEL_FILE_MAGIC);
    goto cleanup;
  }
  printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels);
-  total_labels = max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels;
+  total_labels =
+      max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels;
  assert(image_set->count == total_labels);
@@ -308,7 +321,8 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s
    goto cleanup;
  }
-  if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != total_labels) {
+  if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) !=
+      total_labels) {
    fprintf(stderr, "Failed to read labels\n");
    goto cleanup;
  }
@@ -335,17 +349,17 @@ int main(int argc, const char** argv) {
  bool success = false;
-  gzFile train_images_file = Z_NULL;
+  gzFile           train_images_file = Z_NULL;
-  gzFile train_labels_file = Z_NULL;
+  gzFile           train_labels_file = Z_NULL;
-  gzFile test_images_file  = Z_NULL;
+  gzFile           test_images_file  = Z_NULL;
-  gzFile test_labels_file  = Z_NULL;
+  gzFile           test_labels_file  = Z_NULL;
-  ImageSet train_set = { 0 };
+  ImageSet         train_set         = {0};
-  ImageSet test_set  = { 0 };
+  ImageSet         test_set          = {0};
-  nnNeuralNetwork* net = 0;
+  nnNeuralNetwork* net               = 0;
-  nnQueryObject* query = 0;
+  nnQueryObject*   query             = 0;
  const char* mnist_files_dir = argv[1];
-  const int max_num_images = argc > 2 ? atoi(argv[2]) : -1;
+  const int   max_num_images  = argc > 2 ? atoi(argv[2]) : -1;
  char train_labels_path[PATH_MAX];
  char train_images_path[PATH_MAX];
@@ -353,12 +367,12 @@ int main(int argc, const char** argv) {
  char test_images_path[PATH_MAX];
  strlcpy(train_labels_path, mnist_files_dir, PATH_MAX);
  strlcpy(train_images_path, mnist_files_dir, PATH_MAX);
-  strlcpy(test_labels_path,  mnist_files_dir, PATH_MAX);
+  strlcpy(test_labels_path, mnist_files_dir, PATH_MAX);
-  strlcpy(test_images_path,  mnist_files_dir, PATH_MAX);
+  strlcpy(test_images_path, mnist_files_dir, PATH_MAX);
  strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX);
  strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX);
-  strlcat(test_labels_path,  "/t10k-labels-idx1-ubyte.gz",  PATH_MAX);
+  strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX);
-  strlcat(test_images_path,  "/t10k-images-idx3-ubyte.gz",  PATH_MAX);
+  strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX);
  train_images_file = gzopen(train_images_path, "r");
  if (train_images_file == Z_NULL) {
@@ -406,11 +420,18 @@ int main(int argc, const char** argv) {
  }
  // Network definition.
-  const int image_size_pixels = train_set.rows * train_set.cols;
+  const int     image_size_pixels = train_set.rows * train_set.cols;
-  const int num_layers = 2;
+  const int     num_layers        = 4;
-  const int layer_sizes[3] = { image_size_pixels, 100, 10 };
+  const int     hidden_size       = 100;
-  const nnActivation layer_activations[2] = { nnSigmoid, nnSigmoid };
+  const nnLayer layers[4]         = {
-  if (!(net = nnMakeNet(num_layers, layer_sizes, layer_activations))) {
+      {.type   = nnLinear,
+       .linear = {.input_size = image_size_pixels, .output_size = hidden_size}},
+      {.type = nnSigmoid},
+      {.type   = nnLinear,
+       .linear = {.input_size = hidden_size, .output_size = 10}},
+      {.type = nnSigmoid}
+  };
+  if (!(net = nnMakeNet(layers, num_layers, image_size_pixels))) {
    fprintf(stderr, "Failed to create neural network\n");
    goto cleanup;
  }
@@ -418,17 +439,17 @@ int main(int argc, const char** argv) {
  // Train.
  printf("Training with up to %d images from the data set\n\n", max_num_images);
  const nnTrainingParams training_params = {
-    .learning_rate = 0.1,
+      .learning_rate  = 0.1,
-    .max_iterations = TRAIN_ITERATIONS,
+      .max_iterations = TRAIN_ITERATIONS,
-    .seed = 0,
+      .seed           = 0,
-    .weight_init = nnWeightInitNormal,
+      .weight_init    = nnWeightInitNormal,
-    .debug = true,
+      .debug          = true,
  };
  nnTrain(net, &train_set.images, &train_set.labels, &training_params);
  // Test.
  int hits = 0;
-  query = nnMakeQueryObject(net, /*num_inputs=*/1);
+  query    = nnMakeQueryObject(net, /*num_inputs=*/1);
  for (int i = 0; i < test_set.count; ++i) {
    const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1);
    const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1);
@@ -444,7 +465,7 @@ int main(int argc, const char** argv) {
  }
  const R hit_ratio = (R)hits / (R)test_set.count;
  printf("Test images: %d\n", test_set.count);
-  printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio*100);
+  printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio * 100);
  success = true;
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h
index b7281bf..f80b985 100644
--- a/src/lib/include/neuralnet/matrix.h
+++ b/src/lib/include/neuralnet/matrix.h
@@ -17,6 +17,9 @@ nnMatrix nnMatrixMake(int rows, int cols);
 /// Delete a matrix and free its internal memory.
 void nnMatrixDel(nnMatrix*);
+/// Construct a matrix from an array of values.
+nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]);
 /// Move a matrix.
 ///
 /// |in| is an empty matrix after the move.
diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h
index 05c9406..f122c2a 100644
--- a/src/lib/include/neuralnet/neuralnet.h
+++ b/src/lib/include/neuralnet/neuralnet.h
@@ -1,32 +1,45 @@
 #pragma once
+#include <neuralnet/matrix.h>
 #include <neuralnet/types.h>
-typedef struct nnMatrix nnMatrix;
 typedef struct nnNeuralNetwork nnNeuralNetwork;
 typedef struct nnQueryObject   nnQueryObject;
-/// Neuron activation.
+/// Linear layer parameters.
-typedef enum nnActivation {
+///
-  nnIdentity,
+/// Either one of the following must be set:
+///   a) Training:  input and output sizes.
+///   b) Inference: weights + biases.
+typedef struct nnLinearParams {
+  int      input_size;
+  int      output_size;
+  nnMatrix weights;
+  nnMatrix biases;
+} nnLinearParams;
+/// Layer type.
+typedef enum nnLayerType {
+  nnLinear,
  nnSigmoid,
  nnRelu,
-} nnActivation;
+} nnLayerType;
+/// Neural network layer.
+typedef struct nnLayer {
+  nnLayerType type;
+  union {
+    nnLinearParams linear;
+  };
+} nnLayer;
 /// Create a network.
 nnNeuralNetwork* nnMakeNet(
-    int num_layers, const int* layer_sizes, const nnActivation* activations);
+    const nnLayer* layers, int num_layers, int input_size);
 /// Delete the network and free its internal memory.
 void nnDeleteNet(nnNeuralNetwork**);
-/// Set the network's weights.
-void nnSetWeights(nnNeuralNetwork*, const R* weights);
-/// Set the network's biases.
-void nnSetBiases(nnNeuralNetwork*, const R* biases);
 /// Query the network.
 ///
 /// |input| is a matrix of inputs, one row per input and as many columns as the
@@ -42,10 +55,10 @@ void nnQueryArray(
 /// Create a query object.
 ///
-/// The query object holds all the internal memory required to query a network.
+/// The query object holds all the internal memory required to query a network
-/// Query objects allocate all memory up front so that network queries can run
+/// with batches of the given size. Memory is allocated up front so that network
-/// without additional memory allocation.
+/// queries can run without additional memory allocation.
-nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int num_inputs);
+nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int batch_size);
 /// Delete the query object and free its internal memory.
 void nnDeleteQueryObject(nnQueryObject**);
@@ -60,7 +73,7 @@ int nnNetInputSize(const nnNeuralNetwork*);
 int nnNetOutputSize(const nnNeuralNetwork*);
 /// Return the layer's input size.
-int nnLayerInputSize(const nnMatrix* weights);
+int nnLayerInputSize(const nnNeuralNetwork*, int layer);
 /// Return the layer's output size.
-int nnLayerOutputSize(const nnMatrix* weights);
+int nnLayerOutputSize(const nnNeuralNetwork*, int layer);
diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h
index b56a69e..4c8a9e4 100644
--- a/src/lib/src/activation.h
+++ b/src/lib/src/activation.h
@@ -9,8 +9,8 @@ static inline R sigmoid(R x) { return 1. / (1. + exp(-x)); }
 static inline R relu(R x) { return fmax(0, x); }
 #define NN_MAP_ARRAY(f, in, out, size) \
-  for (int i = 0; i < size; ++i) {     \
+  for (int ii = 0; ii < size; ++ii) {  \
-    out[i] = f(in[i]);                 \
+    out[ii] = f(in[ii]);               \
  }
 #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size)
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c
index d98c8bb..d5c3fcc 100644
--- a/src/lib/src/matrix.c
+++ b/src/lib/src/matrix.c
@@ -26,6 +26,12 @@ void nnMatrixDel(nnMatrix* matrix) {
  }
 }
+nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]) {
+  nnMatrix m = nnMatrixMake(rows, cols);
+  nnMatrixInit(&m, values);
+  return m;
+}
 void nnMatrixMove(nnMatrix* in, nnMatrix* out) {
  assert(in);
  assert(out);
diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c
index a5fc59b..4322b8c 100644
--- a/src/lib/src/neuralnet.c
+++ b/src/lib/src/neuralnet.c
@@ -7,11 +7,65 @@
 #include <assert.h>
 #include <stdlib.h>
+static void MakeLayerImpl(
+    int prev_layer_output_size, const nnLayer* layer, nnLayerImpl* impl) {
+  impl->type = layer->type;
+  switch (layer->type) {
+  case nnLinear: {
+    const nnLinearParams* params = &layer->linear;
+    nnLinearImpl*         linear = &impl->linear;
+    if ((params->input_size > 0) && (params->output_size > 0)) {
+      const int rows  = params->input_size;
+      const int cols  = params->output_size;
+      linear->weights = nnMatrixMake(rows, cols);
+      linear->biases  = nnMatrixMake(1, cols);
+      linear->owned   = true;
+    } else {
+      linear->weights = params->weights;
+      linear->biases  = params->biases;
+      linear->owned   = false;
+    }
+    impl->input_size  = linear->weights.rows;
+    impl->output_size = linear->weights.cols;
+    break;
+  }
+  // Activation layers.
+  case nnRelu:
+  case nnSigmoid:
+    impl->input_size  = prev_layer_output_size;
+    impl->output_size = prev_layer_output_size;
+    break;
+  }
+}
+static void DeleteLayer(nnLayerImpl* layer) {
+  switch (layer->type) {
+  case nnLinear: {
+    nnLinearImpl* linear = &layer->linear;
+    if (linear->owned) {
+      nnMatrixDel(&linear->weights);
+      nnMatrixDel(&linear->biases);
+    }
+    break;
+  }
+  // No parameters for these layers.
+  case nnRelu:
+  case nnSigmoid:
+    break;
+  }
+}
 nnNeuralNetwork* nnMakeNet(
-    int num_layers, const int* layer_sizes, const nnActivation* activations) {
+    const nnLayer* layers, int num_layers, int input_size) {
+  assert(layers);
  assert(num_layers > 0);
-  assert(layer_sizes);
+  assert(input_size > 0);
-  assert(activations);
  nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork));
  if (net == 0) {
@@ -20,84 +74,38 @@ nnNeuralNetwork* nnMakeNet(
  net->num_layers = num_layers;
-  net->weights     = calloc(num_layers, sizeof(nnMatrix));
+  net->layers = calloc(num_layers, sizeof(nnLayerImpl));
-  net->biases      = calloc(num_layers, sizeof(nnMatrix));
+  if (net->layers == 0) {
-  net->activations = calloc(num_layers, sizeof(nnActivation));
-  if ((net->weights == 0) || (net->biases == 0) || (net->activations == 0)) {
    nnDeleteNet(&net);
    return 0;
  }
+  int prev_layer_output_size = input_size;
  for (int l = 0; l < num_layers; ++l) {
-    // layer_sizes = { input layer size, first hidden layer size, ...}
+    MakeLayerImpl(prev_layer_output_size, &layers[l], &net->layers[l]);
-    const int layer_input_size  = layer_sizes[l];
+    prev_layer_output_size = net->layers[l].output_size;
-    const int layer_output_size = layer_sizes[l + 1];
-    // We store the transpose of the weight matrix as written in textbooks.
-    // Our vectors are row vectors and the matrices row-major.
-    const int rows = layer_input_size;
-    const int cols = layer_output_size;
-    net->weights[l]     = nnMatrixMake(rows, cols);
-    net->biases[l]      = nnMatrixMake(1, cols);
-    net->activations[l] = activations[l];
  }
  return net;
 }
-void nnDeleteNet(nnNeuralNetwork** net) {
+void nnDeleteNet(nnNeuralNetwork** ppNet) {
-  if ((!net) || (!(*net))) {
+  if ((!ppNet) || (!(*ppNet))) {
    return;
  }
-  if ((*net)->weights != 0) {
+  nnNeuralNetwork* net = *ppNet;
-    for (int l = 0; l < (*net)->num_layers; ++l) {
-      nnMatrixDel(&(*net)->weights[l]);
-    }
-    free((*net)->weights);
-    (*net)->weights = 0;
-  }
-  if ((*net)->biases != 0) {
-    for (int l = 0; l < (*net)->num_layers; ++l) {
-      nnMatrixDel(&(*net)->biases[l]);
-    }
-    free((*net)->biases);
-    (*net)->biases = 0;
-  }
-  if ((*net)->activations) {
-    free((*net)->activations);
-    (*net)->activations = 0;
-  }
-  free(*net);
-  *net = 0;
-}
-void nnSetWeights(nnNeuralNetwork* net, const R* weights) {
-  assert(net);
-  assert(weights);
  for (int l = 0; l < net->num_layers; ++l) {
-    nnMatrix* layer_weights = &net->weights[l];
+    DeleteLayer(&net->layers[l]);
-    R*        layer_values  = layer_weights->values;
-    for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) {
-      *layer_values++ = *weights++;
-    }
  }
-}
-void nnSetBiases(nnNeuralNetwork* net, const R* biases) {
-  assert(net);
-  assert(biases);
-  for (int l = 0; l < net->num_layers; ++l) {
-    nnMatrix* layer_biases = &net->biases[l];
-    R*        layer_values = layer_biases->values;
-    for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) {
+  if (net->layers) {
-      *layer_values++ = *biases++;
+    free(net->layers);
-    }
+    net->layers = 0;
  }
+  free(net);
+  *ppNet = 0;
 }
 void nnQuery(
@@ -114,35 +122,40 @@ void nnQuery(
    nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1);
    for (int l = 0; l < net->num_layers; ++l) {
-      const nnMatrix* layer_weights = &net->weights[l];
-      const nnMatrix* layer_biases  = &net->biases[l];
-      // Y^T = (W*X)^T = X^T*W^T
-      //
-      // TODO: If we had a row-row matrix multiplication, we could compute:
-      //   Y^T = W ** X^T
-      // The row-row multiplication could be more cache-friendly. We just need
-      // to store W as is, without transposing.
-      // We could also rewrite the original Mul function to go row x row,
-      // decomposing the multiplication. Preserving the original meaning of Mul
-      // makes everything clearer.
      nnMatrix output_vector =
          nnMatrixBorrowRows(&query->layer_outputs[l], i, 1);
-      nnMatrixMul(&input_vector, layer_weights, &output_vector);
-      nnMatrixAddRow(&output_vector, layer_biases, &output_vector);
-      switch (net->activations[l]) {
+      switch (net->layers[l].type) {
-      case nnIdentity:
+      case nnLinear: {
-        break; // Nothing to do for the identity function.
+        const nnLinearImpl* linear        = &net->layers[l].linear;
-      case nnSigmoid:
+        const nnMatrix*     layer_weights = &linear->weights;
-        sigmoid_array(
+        const nnMatrix*     layer_biases  = &linear->biases;
-            output_vector.values, output_vector.values, output_vector.cols);
+        // Y^T = (W*X)^T = X^T*W^T
+        //
+        // TODO: If we had a row-row matrix multiplication, we could compute:
+        //   Y^T = W ** X^T
+        //
+        // The row-row multiplication could be more cache-friendly. We just need
+        // to store W as is, without transposing.
+        //
+        // We could also rewrite the original Mul function to go row x row,
+        // decomposing the multiplication. Preserving the original meaning of
+        // Mul makes everything clearer.
+        nnMatrixMul(&input_vector, layer_weights, &output_vector);
+        nnMatrixAddRow(&output_vector, layer_biases, &output_vector);
        break;
+      }
      case nnRelu:
+        assert(input_vector.cols == output_vector.cols);
        relu_array(
-            output_vector.values, output_vector.values, output_vector.cols);
+            input_vector.values, output_vector.values, output_vector.cols);
+        break;
+      case nnSigmoid:
+        assert(input_vector.cols == output_vector.cols);
+        sigmoid_array(
+            input_vector.values, output_vector.values, output_vector.cols);
        break;
-      default:
-        assert(0);
      }
      input_vector = output_vector; // Borrow.
@@ -159,15 +172,15 @@ void nnQueryArray(
  assert(output);
  assert(net->num_layers > 0);
-  nnMatrix input_vector = nnMatrixMake(net->weights[0].cols, 1);
+  nnMatrix input_vector = nnMatrixMake(1, nnNetInputSize(net));
  nnMatrixInit(&input_vector, input);
  nnQuery(net, query, &input_vector);
  nnMatrixRowToArray(query->network_outputs, 0, output);
 }
-nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) {
+nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int batch_size) {
  assert(net);
-  assert(num_inputs > 0);
+  assert(batch_size > 0);
  assert(net->num_layers > 0);
  nnQueryObject* query = calloc(1, sizeof(nnQueryObject));
@@ -183,11 +196,12 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) {
    free(query);
    return 0;
  }
  for (int l = 0; l < net->num_layers; ++l) {
-    const nnMatrix* layer_weights     = &net->weights[l];
+    const int layer_output_size = nnLayerOutputSize(net, l);
-    const int       layer_output_size = nnLayerOutputSize(layer_weights);
+    query->layer_outputs[l]     = nnMatrixMake(batch_size, layer_output_size);
-    query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size);
  }
  query->network_outputs = &query->layer_outputs[net->num_layers - 1];
  return query;
@@ -213,23 +227,19 @@ const nnMatrix* nnNetOutputs(const nnQueryObject* query) {
 }
 int nnNetInputSize(const nnNeuralNetwork* net) {
-  assert(net);
+  return nnLayerInputSize(net, 0);
-  assert(net->num_layers > 0);
-  return net->weights[0].rows;
 }
 int nnNetOutputSize(const nnNeuralNetwork* net) {
-  assert(net);
+  return nnLayerOutputSize(net, net->num_layers - 1);
-  assert(net->num_layers > 0);
-  return net->weights[net->num_layers - 1].cols;
 }
-int nnLayerInputSize(const nnMatrix* weights) {
+int nnLayerInputSize(const nnNeuralNetwork* net, int layer) {
-  assert(weights);
+  assert(net);
-  return weights->rows;
+  return net->layers[layer].input_size;
 }
-int nnLayerOutputSize(const nnMatrix* weights) {
+int nnLayerOutputSize(const nnNeuralNetwork* net, int layer) {
-  assert(weights);
+  assert(net);
-  return weights->cols;
+  return net->layers[layer].output_size;
 }
diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h
index f5a9c63..935c5ea 100644
--- a/src/lib/src/neuralnet_impl.h
+++ b/src/lib/src/neuralnet_impl.h
@@ -2,22 +2,29 @@
 #include <neuralnet/matrix.h>
+#include <stdbool.h>
+/// Linear layer parameters.
+typedef struct nnLinearImpl {
+  nnMatrix weights;
+  nnMatrix biases;
+  bool     owned; /// Whether the library owns the weights and biases.
+} nnLinearImpl;
+/// Neural network layer.
+typedef struct nnLayerImpl {
+  nnLayerType type;
+  int         input_size;
+  int         output_size;
+  union {
+    nnLinearImpl linear;
+  };
+} nnLayerImpl;
 /// Neural network object.
-///
-/// We store the transposes of the weight matrices so that we can do forward
-/// passes with a minimal amount of work. That is, if in paper we write:
-///
-///   [w11 w21]
-///   [w12 w22]
-///
-/// then the weight matrix in memory is stored as the following array:
-///
-///   w11 w12 w21 w22
 typedef struct nnNeuralNetwork {
-  int           num_layers;  // Number of non-input layers (hidden + output).
+  int          num_layers; // Number of non-input layers (hidden + output).
-  nnMatrix*     weights;     // One matrix per non-input layer.
+  nnLayerImpl* layers;     // One per non-input layer.
-  nnMatrix*     biases;      // One vector per non-input layer.
-  nnActivation* activations; // One per non-input layer.
 } nnNeuralNetwork;
 /// A query object that holds all the memory necessary to query a network.
diff --git a/src/lib/src/train.c b/src/lib/src/train.c
index dc93f0f..98f58ad 100644
--- a/src/lib/src/train.c
+++ b/src/lib/src/train.c
@@ -38,7 +38,7 @@ typedef struct nnSigmoidGradientElements {
 /// each layer. A data type is defined for these because we allocate all the
 /// required memory up front before entering the training loop.
 typedef struct nnGradientElements {
-  nnActivation type;
+  nnLayerType type;
  // Gradient vector, same size as the layer.
  // This will contain the gradient expression except for the output value of
  // the previous layer.
@@ -57,10 +57,27 @@ void nnInitNet(
  mt19937_64_init(&rng, seed);
  for (int l = 0; l < net->num_layers; ++l) {
-    nnMatrix* weights = &net->weights[l];
+    // Get the layer's weights and biases, if any.
-    nnMatrix* biases  = &net->biases[l];
+    nnMatrix* weights = 0;
+    nnMatrix* biases  = 0;
+    switch (net->layers[l].type) {
+    case nnLinear: {
+      nnLinearImpl* linear = &net->layers[l].linear;
+      weights = &linear->weights;
+      biases  = &linear->biases;
+      break;
+    }
+    // Activations.
+    case nnRelu:
+    case nnSigmoid:
+      break;
+    }
+    if (!weights || !biases) {
+      continue;
+    }
-    const R layer_size = (R)nnLayerInputSize(weights);
+    const R layer_size = (R)nnLayerInputSize(net, l);
    const R scale      = 1. / layer_size;
    const R stdev      = 1. / sqrt((R)layer_size);
    const R sigma      = stdev * stdev;
@@ -128,9 +145,6 @@ void nnTrain(
  // with one sample at a time.
  nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix));
-  // Allocate the weight transpose matrices up front for backpropagation.
-  // nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix));
  // Allocate the weight delta matrices.
  nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix));
@@ -144,30 +158,24 @@ void nnTrain(
  nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix));
  assert(errors != 0);
-  // assert(weights_T != 0);
  assert(weight_deltas != 0);
  assert(gradient_elems);
  assert(outputs_T);
  for (int l = 0; l < net->num_layers; ++l) {
-    const nnMatrix*    layer_weights     = &net->weights[l];
+    const int          layer_input_size  = nnLayerInputSize(net, l);
-    const int          layer_output_size = net->weights[l].cols;
+    const int          layer_output_size = nnLayerOutputSize(net, l);
-    const nnActivation activation        = net->activations[l];
+    const nnLayerImpl* layer             = &net->layers[l];
-    errors[l] = nnMatrixMake(1, layer_weights->cols);
-    // weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows);
-    // nnMatrixTranspose(layer_weights, &weights_T[l]);
-    weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols);
-    outputs_T[l] = nnMatrixMake(layer_output_size, 1);
+    errors[l]        = nnMatrixMake(1, layer_output_size);
+    weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size);
+    outputs_T[l]     = nnMatrixMake(layer_output_size, 1);
    // Allocate the gradient elements and vectors for weight delta calculation.
    nnGradientElements* elems = &gradient_elems[l];
-    elems->type               = activation;
+    elems->type               = layer->type;
-    switch (activation) {
+    switch (layer->type) {
-    case nnIdentity:
+    case nnLinear:
      break; // Gradient vector will be borrowed, no need to allocate.
    case nnSigmoid:
@@ -208,6 +216,7 @@ void nnTrain(
    // For now, we train with one sample at a time.
    for (int sample = 0; sample < inputs->rows; ++sample) {
+      // TODO: Introduce a BorrowMut.
      // Slice the input and target matrices with the batch size.
      // We are not mutating the inputs, but we need the cast to borrow.
      nnMatrix training_inputs =
@@ -219,15 +228,16 @@ void nnTrain(
      // Assuming one training input per iteration for now.
      nnMatrixTranspose(&training_inputs, &training_inputs_T);
-      // Run a forward pass and compute the output layer error relevant to the
+      // Forward pass.
-      // derivative: o-t.
+      nnQuery(net, query, &training_inputs);
-      //   Error: (t-o)^2
-      //   dE/do = -2(t-o)
+      // Compute the error derivative: o-t.
-      //         = +2(o-t)
+      //   Error: 1/2 (t-o)^2
+      //   dE/do = -(t-o)
+      //         = +(o-t)
      // Note that we compute o-t instead to remove that outer negative sign.
      // The 2 is dropped because we are only interested in the direction of the
      // gradient. The learning rate controls the magnitude.
-      nnQuery(net, query, &training_inputs);
      nnMatrixSub(
          training_outputs, &training_targets, &errors[net->num_layers - 1]);
@@ -236,68 +246,86 @@ void nnTrain(
        nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]);
      }
-      // Update weights and biases for each internal layer, backpropagating
+      // Update weights and biases for each internal layer, back-propagating
      // errors along the way.
      for (int l = net->num_layers - 1; l >= 0; --l) {
-        const nnMatrix*     layer_output  = &query->layer_outputs[l];
+        const nnMatrix*     layer_output = &query->layer_outputs[l];
-        nnMatrix*           layer_weights = &net->weights[l];
+        nnGradientElements* elems        = &gradient_elems[l];
-        nnMatrix*           layer_biases  = &net->biases[l];
+        nnMatrix*           gradient     = &elems->gradient;
-        nnGradientElements* elems         = &gradient_elems[l];
+        nnLayerImpl*        layer        = &net->layers[l];
-        nnMatrix*           gradient      = &elems->gradient;
-        const nnActivation  activation    = net->activations[l];
+        // Compute this layer's gradient.
+        //
-        // Compute the gradient (the part of the expression that does not
+        // By "gradient" we mean the expression common to the weights and bias
-        // contain the output of the previous layer).
+        // gradients. This is the part of the expression that does not contain
+        // this layer's input.
        //
-        // Identity: G = error_k
+        // Linear:   G = id
-        // Sigmoid:  G = error_k * output_k * (1 - output_k).
+        // Relu:     G = (output_k > 0 ? 1 : 0)
-        // Relu:     G = error_k * (output_k > 0 ? 1 : 0)
+        // Sigmoid:  G = output_k * (1 - output_k)
-        switch (activation) {
+        switch (layer->type) {
-        case nnIdentity:
+        case nnLinear: {
          // TODO: Just copy the pointer?
          *gradient = nnMatrixBorrow(&errors[l]);
          break;
+        }
+        case nnRelu:
+          nnMatrixGt(layer_output, 0, gradient);
+          break;
        case nnSigmoid:
          nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient);
          nnMatrixMulPairs(layer_output, gradient, gradient);
-          nnMatrixMulPairs(&errors[l], gradient, gradient);
-          break;
-        case nnRelu:
-          nnMatrixGt(layer_output, 0, gradient);
-          nnMatrixMulPairs(&errors[l], gradient, gradient);
          break;
        }
-        // Outer product to compute the weight deltas.
+        // Back-propagate the error.
-        const nnMatrix* output_T =
+        //
-            (l == 0) ? &training_inputs_T : &outputs_T[l - 1];
+        // This combines this layer's gradient with the back-propagated error,
-        nnMatrixMul(output_T, gradient, &weight_deltas[l]);
+        // which is the combination of the gradients of subsequent layers down
+        // to the output layer error.
-        // Backpropagate the error before updating weights.
+        //
+        // Note that this step uses the layer's original weights.
        if (l > 0) {
-          // G * W^T == G *^T W.
+          switch (layer->type) {
-          // nnMatrixMul(gradient, &weights_T[l], &errors[l-1]);
+          case nnLinear: {
-          nnMatrixMulRows(gradient, layer_weights, &errors[l - 1]);
+            const nnMatrix* layer_weights = &layer->linear.weights;
+            // E * W^T == E *^T W.
+            // Using nnMatrixMulRows, we avoid having to transpose the weight
+            // matrix.
+            nnMatrixMulRows(&errors[l], layer_weights, &errors[l - 1]);
+            break;
+          }
+          // For activations, the error back-propagates as is but multiplied by
+          // the layer's gradient.
+          case nnRelu:
+          case nnSigmoid:
+            nnMatrixMulPairs(&errors[l], gradient, &errors[l - 1]);
+            break;
+          }
        }
-        // Update weights.
+        // Update layer weights.
-        nnMatrixScale(&weight_deltas[l], params->learning_rate);
+        if (layer->type == nnLinear) {
-        // The gradient has a negative sign from -(t - o), but we have computed
+          nnLinearImpl* linear        = &layer->linear;
-        // e = o - t instead, so we can subtract directly.
+          nnMatrix*     layer_weights = &linear->weights;
-        // nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights);
+          nnMatrix*     layer_biases  = &linear->biases;
-        nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights);
+          // Outer product to compute the weight deltas.
-        // Update weight transpose matrix for the next training iteration.
+          // This layer's input is the previous layer's output.
-        // nnMatrixTranspose(layer_weights, &weights_T[l]);
+          const nnMatrix* input_T =
+              (l == 0) ? &training_inputs_T : &outputs_T[l - 1];
-        // Update biases.
+          nnMatrixMul(input_T, gradient, &weight_deltas[l]);
-        // This is the same formula as for weights, except that the o_j term is
-        // just 1. We can simply re-use the gradient that we have already
+          // Update weights.
-        // computed for the weight update.
+          nnMatrixScale(&weight_deltas[l], params->learning_rate);
-        // nnMatrixMulAdd(layer_biases, gradient, params->learning_rate,
+          nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights);
-        // layer_biases);
-        nnMatrixMulSub(
+          // Update biases.
-            layer_biases, gradient, params->learning_rate, layer_biases);
+          // This is the same formula as for weights, except that the o_j term
+          // is just 1.
+          nnMatrixMulSub(
+              layer_biases, gradient, params->learning_rate, layer_biases);
+        }
      }
      // TODO: Add this under a verbose debugging mode.
@@ -334,12 +362,11 @@ void nnTrain(
  for (int l = 0; l < net->num_layers; ++l) {
    nnMatrixDel(&errors[l]);
    nnMatrixDel(&outputs_T[l]);
-    // nnMatrixDel(&weights_T[l]);
    nnMatrixDel(&weight_deltas[l]);
    nnGradientElements* elems = &gradient_elems[l];
    switch (elems->type) {
-    case nnIdentity:
+    case nnLinear:
      break; // Gradient vector is borrowed, no need to deallocate.
    case nnSigmoid:
@@ -355,7 +382,6 @@ void nnTrain(
  nnMatrixDel(&training_inputs_T);
  free(errors);
  free(outputs_T);
-  // free(weights_T);
  free(weight_deltas);
  free(gradient_elems);
 }
diff --git a/src/lib/test/neuralnet_test.c b/src/lib/test/neuralnet_test.c
index 14d9438..0f8d7b8 100644
--- a/src/lib/test/neuralnet_test.c
+++ b/src/lib/test/neuralnet_test.c
@@ -1,8 +1,8 @@
 #include <neuralnet/neuralnet.h>
-#include <neuralnet/matrix.h>
 #include "activation.h"
 #include "neuralnet_impl.h"
+#include <neuralnet/matrix.h>
 #include "test.h"
 #include "test_util.h"
@@ -10,23 +10,31 @@
 #include <assert.h>
 TEST_CASE(neuralnet_perceptron_test) {
-  const int num_layers = 1;
+  const int     num_layers = 2;
-  const int layer_sizes[] = { 1, 1 };
+  const int     input_size = 1;
-  const nnActivation layer_activations[] = { nnSigmoid };
+  const R       weights[]  = {0.3};
-  const R weights[] = { 0.3 };
+  const R       biases[]   = {0.0};
+  const nnLayer layers[]   = {
+      {.type = nnLinear,
+       .linear =
+             {.weights = nnMatrixFromArray(1, 1, weights),
+              .biases  = nnMatrixFromArray(1, 1, biases)}},
+      {.type = nnSigmoid},
+  };
-  nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations);
+  nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size);
  assert(net);
-  nnSetWeights(net, weights);
-  nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1);
+  nnQueryObject* query = nnMakeQueryObject(net, 1);
-  const R input[] = { 0.9 };
+  const R input[] = {0.9};
-  R output[1];
+  R       output[1];
  nnQueryArray(net, query, input, output);
  const R expected_output = sigmoid(input[0] * weights[0]);
-  printf("\nOutput: %f, Expected: %f\n", output[0], expected_output);
+  printf(
+      "\n[neuralnet_perceptron_test] Output: %f, Expected: %f\n", output[0],
+      expected_output);
  TEST_TRUE(double_eq(output[0], expected_output, EPS));
  nnDeleteQueryObject(&query);
@@ -34,53 +42,66 @@ TEST_CASE(neuralnet_perceptron_test) {
 }
 TEST_CASE(neuralnet_xor_test) {
-  const int num_layers = 2;
+  // First (hidden) layer.
-  const int layer_sizes[] = { 2, 2, 1 };
+  const R weights0[] = {1, 1, 1, 1};
-  const nnActivation layer_activations[] = { nnRelu, nnIdentity };
+  const R biases0[]  = {0, -1};
-  const R weights[] = {
+  // Second (output) layer.
-    1, 1, 1, 1,  // First (hidden) layer.
+  const R weights1[] = {1, -2};
-    1, -2        // Second (output) layer.
+  const R biases1[]  = {0};
-  };
+  // Network.
-  const R biases[] = {
+  const int     num_layers = 3;
-    0, -1,  // First (hidden) layer.
+  const int     input_size = 2;
-    0       // Second (output) layer.
+  const nnLayer layers[]   = {
+      {.type = nnLinear,
+       .linear =
+             {.weights = nnMatrixFromArray(2, 2, weights0),
+              .biases  = nnMatrixFromArray(1, 2, biases0)}},
+      {.type = nnRelu},
+      {.type = nnLinear,
+       .linear =
+             {.weights = nnMatrixFromArray(2, 1, weights1),
+              .biases  = nnMatrixFromArray(1, 1, biases1)}},
  };
-  nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations);
+  nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size);
  assert(net);
-  nnSetWeights(net, weights);
-  nnSetBiases(net, biases);
  // First layer weights.
-  TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 0), 1);
+  TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 0), 1);
-  TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 1), 1);
+  TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 1), 1);
-  TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 2), 1);
+  TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 2), 1);
-  TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 3), 1);
+  TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 3), 1);
-  // Second layer weights.
+  // Second linear layer (third layer) weights.
-  TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 0), 1);
+  TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 0), 1);
-  TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 1), -2);
+  TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 1), -2);
  // First layer biases.
-  TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 0), 0);
+  TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 0), 0);
-  TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 1), -1);
+  TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 1), -1);
-  // Second layer biases.
+  // Second linear layer (third layer) biases.
-  TEST_EQUAL(nnMatrixAt(&net->biases[1], 0, 0), 0);
+  TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.biases, 0, 0), 0);
  // Test.
-  #define M 4
+#define M 4
-  nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M);
+  nnQueryObject* query = nnMakeQueryObject(net, M);
-  const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } };
+  const R test_inputs[M][2] = {
+      {0., 0.},
+      {1., 0.},
+      {0., 1.},
+      {1., 1.}
+  };
  nnMatrix test_inputs_matrix = nnMatrixMake(M, 2);
  nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs);
  nnQuery(net, query, &test_inputs_matrix);
-  const R expected_outputs[M] = { 0., 1., 1., 0. };
+  const R expected_outputs[M] = {0., 1., 1., 0.};
  for (int i = 0; i < M; ++i) {
    const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0);
-    printf("\nInput: (%f, %f), Output: %f, Expected: %f\n",
+    printf(
-      test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]);
+        "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0],
+        test_inputs[i][1], test_output, expected_outputs[i]);
  }
  for (int i = 0; i < M; ++i) {
    const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0);
diff --git a/src/lib/test/train_linear_perceptron_non_origin_test.c b/src/lib/test/train_linear_perceptron_non_origin_test.c
index 5a320ac..40a42e0 100644
--- a/src/lib/test/train_linear_perceptron_non_origin_test.c
+++ b/src/lib/test/train_linear_perceptron_non_origin_test.c
@@ -1,9 +1,8 @@
 #include <neuralnet/train.h>
+#include "neuralnet_impl.h"
 #include <neuralnet/matrix.h>
 #include <neuralnet/neuralnet.h>
-#include "activation.h"
-#include "neuralnet_impl.h"
 #include "test.h"
 #include "test_util.h"
@@ -11,19 +10,21 @@
 #include <assert.h>
 TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) {
-  const int num_layers = 1;
+  const int     num_layers = 1;
-  const int layer_sizes[] = { 1, 1 };
+  const int     input_size = 1;
-  const nnActivation layer_activations[] = { nnIdentity };
+  const nnLayer layers[]   = {
+      {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}}
+  };
-  nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations);
+  nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size);
  assert(net);
-  // Train.
+// Train.
-  // Try to learn the Y = 2X + 1 line.
+// Try to learn the Y = 2X + 1 line.
-  #define N 2
+#define N 2
-  const R inputs[N]  = { 0., 1. };
+  const R inputs[N]  = {0., 1.};
-  const R targets[N] = { 1., 3. };
+  const R targets[N] = {1., 3.};
  nnMatrix inputs_matrix  = nnMatrixMake(N, 1);
  nnMatrix targets_matrix = nnMatrixMake(N, 1);
@@ -31,31 +32,32 @@ TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) {
  nnMatrixInit(&targets_matrix, targets);
  nnTrainingParams params = {
-    .learning_rate = 0.7,
+      .learning_rate  = 0.7,
-    .max_iterations = 20,
+      .max_iterations = 20,
-    .seed = 0,
+      .seed           = 0,
-    .weight_init = nnWeightInit01,
+      .weight_init    = nnWeightInit01,
-    .debug = false,
+      .debug          = false,
  };
  nnTrain(net, &inputs_matrix, &targets_matrix, &params);
-  const R weight = nnMatrixAt(&net->weights[0], 0, 0);
+  const R weight          = nnMatrixAt(&net->layers[0].linear.weights, 0, 0);
  const R expected_weight = 2.0;
-  printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight);
+  printf(
+      "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight);
  TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS));
-  const R bias = nnMatrixAt(&net->biases[0], 0, 0);
+  const R bias          = nnMatrixAt(&net->layers[0].linear.biases, 0, 0);
  const R expected_bias = 1.0;
  printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias);
  TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS));
  // Test.
-  nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1);
+  nnQueryObject* query = nnMakeQueryObject(net, 1);
-  const R test_input[] = { 2.3 };
+  const R test_input[] = {2.3};
-  R test_output[1];
+  R       test_output[1];
  nnQueryArray(net, query, test_input, test_output);
  const R expected_output = test_input[0] * expected_weight + expected_bias;
diff --git a/src/lib/test/train_linear_perceptron_test.c b/src/lib/test/train_linear_perceptron_test.c
index 2b1336d..667643b 100644
--- a/src/lib/test/train_linear_perceptron_test.c
+++ b/src/lib/test/train_linear_perceptron_test.c
@@ -1,9 +1,8 @@
 #include <neuralnet/train.h>
+#include "neuralnet_impl.h"
 #include <neuralnet/matrix.h>
 #include <neuralnet/neuralnet.h>
-#include "activation.h"
-#include "neuralnet_impl.h"
 #include "test.h"
 #include "test_util.h"
@@ -11,19 +10,21 @@
 #include <assert.h>
 TEST_CASE(neuralnet_train_linear_perceptron_test) {
-  const int num_layers = 1;
+  const int     num_layers = 1;
-  const int layer_sizes[] = { 1, 1 };
+  const int     input_size = 1;
-  const nnActivation layer_activations[] = { nnIdentity };
+  const nnLayer layers[]   = {
+      {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}}
+  };
-  nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations);
+  nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size);
  assert(net);
-  // Train.
+// Train.
-  // Try to learn the Y=X line.
+// Try to learn the Y=X line.
-  #define N 2
+#define N 2
-  const R inputs[N]  = { 0., 1. };
+  const R inputs[N]  = {0., 1.};
-  const R targets[N] = { 0., 1. };
+  const R targets[N] = {0., 1.};
  nnMatrix inputs_matrix  = nnMatrixMake(N, 1);
  nnMatrix targets_matrix = nnMatrixMake(N, 1);
@@ -31,26 +32,27 @@ TEST_CASE(neuralnet_train_linear_perceptron_test) {
  nnMatrixInit(&targets_matrix, targets);
  nnTrainingParams params = {
-    .learning_rate = 0.7,
+      .learning_rate  = 0.7,
-    .max_iterations = 10,
+      .max_iterations = 10,
-    .seed = 0,
+      .seed           = 0,
-    .weight_init = nnWeightInit01,
+      .weight_init    = nnWeightInit01,
-    .debug = false,
+      .debug          = false,
  };
  nnTrain(net, &inputs_matrix, &targets_matrix, &params);
-  const R weight = nnMatrixAt(&net->weights[0], 0, 0);
+  const R weight          = nnMatrixAt(&net->layers[0].linear.weights, 0, 0);
  const R expected_weight = 1.0;
-  printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight);
+  printf(
+      "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight);
  TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS));
  // Test.
-  nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1);
+  nnQueryObject* query = nnMakeQueryObject(net, 1);
-  const R test_input[] = { 2.3 };
+  const R test_input[] = {2.3};
-  R test_output[1];
+  R       test_output[1];
  nnQueryArray(net, query, test_input, test_output);
  const R expected_output = test_input[0];
diff --git a/src/lib/test/train_sigmoid_test.c b/src/lib/test/train_sigmoid_test.c
index 588e7ca..39a84b0 100644
--- a/src/lib/test/train_sigmoid_test.c
+++ b/src/lib/test/train_sigmoid_test.c
@@ -1,9 +1,9 @@
 #include <neuralnet/train.h>
-#include <neuralnet/matrix.h>
-#include <neuralnet/neuralnet.h>
 #include "activation.h"
 #include "neuralnet_impl.h"
+#include <neuralnet/matrix.h>
+#include <neuralnet/neuralnet.h>
 #include "test.h"
 #include "test_util.h"
@@ -11,21 +11,24 @@
 #include <assert.h>
 TEST_CASE(neuralnet_train_sigmoid_test) {
-  const int num_layers = 1;
+  const int     num_layers = 2;
-  const int layer_sizes[] = { 1, 1 };
+  const int     input_size = 1;
-  const nnActivation layer_activations[] = { nnSigmoid };
+  const nnLayer layers[]   = {
+      {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}},
+      {.type = nnSigmoid},
+  };
-  nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations);
+  nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size);
  assert(net);
-  // Train.
+// Train.
-  // Try to learn the sigmoid function.
+// Try to learn the sigmoid function.
-  #define N 3
+#define N 3
  R inputs[N];
  R targets[N];
  for (int i = 0; i < N; ++i) {
-    inputs[i] = lerp(-1, +1, (R)i / (R)(N-1));
+    inputs[i]  = lerp(-1, +1, (R)i / (R)(N - 1));
    targets[i] = sigmoid(inputs[i]);
  }
@@ -35,29 +38,30 @@ TEST_CASE(neuralnet_train_sigmoid_test) {
  nnMatrixInit(&targets_matrix, targets);
  nnTrainingParams params = {
-    .learning_rate = 0.9,
+      .learning_rate  = 0.9,
-    .max_iterations = 100,
+      .max_iterations = 100,
-    .seed = 0,
+      .seed           = 0,
-    .weight_init = nnWeightInit01,
+      .weight_init    = nnWeightInit01,
-    .debug = false,
+      .debug          = false,
  };
  nnTrain(net, &inputs_matrix, &targets_matrix, &params);
-  const R weight = nnMatrixAt(&net->weights[0], 0, 0);
+  const R weight          = nnMatrixAt(&net->layers[0].linear.weights, 0, 0);
  const R expected_weight = 1.0;
-  printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight);
+  printf(
+      "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight);
  TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS));
  // Test.
-  nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1);
+  nnQueryObject* query = nnMakeQueryObject(net, 1);
-  const R test_input[] = { 0.3 };
+  const R test_input[] = {0.3};
-  R test_output[1];
+  R       test_output[1];
  nnQueryArray(net, query, test_input, test_output);
-  const R expected_output = 0.574442516811659;  // sigmoid(0.3)
+  const R expected_output = 0.574442516811659; // sigmoid(0.3)
  printf("Output: %f, Expected: %f\n", test_output[0], expected_output);
  TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS));
diff --git a/src/lib/test/train_xor_test.c b/src/lib/test/train_xor_test.c
index 6ddc6e0..78695a3 100644
--- a/src/lib/test/train_xor_test.c
+++ b/src/lib/test/train_xor_test.c
@@ -1,9 +1,9 @@
 #include <neuralnet/train.h>
-#include <neuralnet/matrix.h>
-#include <neuralnet/neuralnet.h>
 #include "activation.h"
 #include "neuralnet_impl.h"
+#include <neuralnet/matrix.h>
+#include <neuralnet/neuralnet.h>
 #include "test.h"
 #include "test_util.h"
@@ -11,18 +11,27 @@
 #include <assert.h>
 TEST_CASE(neuralnet_train_xor_test) {
-  const int num_layers = 2;
+  const int     num_layers = 3;
-  const int layer_sizes[] = { 2, 2, 1 };
+  const int     input_size = 2;
-  const nnActivation layer_activations[] = { nnRelu, nnIdentity };
+  const nnLayer layers[]   = {
+      {.type = nnLinear, .linear = {.input_size = 2, .output_size = 2}},
+      {.type = nnRelu},
+      {.type = nnLinear, .linear = {.input_size = 2, .output_size = 1}}
+  };
-  nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations);
+  nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size);
  assert(net);
  // Train.
-  #define N 4
+#define N 4
-  const R inputs[N][2]  = { { 0., 0. }, { 0., 1. }, { 1., 0. }, { 1., 1. } };
+  const R inputs[N][2] = {
-  const R targets[N] = { 0., 1., 1., 0. };
+      {0., 0.},
+      {0., 1.},
+      {1., 0.},
+      {1., 1.}
+  };
+  const R targets[N] = {0., 1., 1., 0.};
  nnMatrix inputs_matrix  = nnMatrixMake(N, 2);
  nnMatrix targets_matrix = nnMatrixMake(N, 1);
@@ -30,31 +39,37 @@ TEST_CASE(neuralnet_train_xor_test) {
  nnMatrixInit(&targets_matrix, targets);
  nnTrainingParams params = {
-    .learning_rate = 0.1,
+      .learning_rate  = 0.1,
-    .max_iterations = 500,
+      .max_iterations = 500,
-    .seed = 0,
+      .seed           = 0,
-    .weight_init = nnWeightInit01,
+      .weight_init    = nnWeightInit01,
-    .debug = false,
+      .debug          = false,
  };
  nnTrain(net, &inputs_matrix, &targets_matrix, &params);
  // Test.
-  #define M 4
+#define M 4
-  nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M);
+  nnQueryObject* query = nnMakeQueryObject(net, M);
-  const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } };
+  const R test_inputs[M][2] = {
+      {0., 0.},
+      {1., 0.},
+      {0., 1.},
+      {1., 1.}
+  };
  nnMatrix test_inputs_matrix = nnMatrixMake(M, 2);
  nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs);
  nnQuery(net, query, &test_inputs_matrix);
-  const R expected_outputs[M] = { 0., 1., 1., 0. };
+  const R expected_outputs[M] = {0., 1., 1., 0.};
  for (int i = 0; i < M; ++i) {
    const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0);
-    printf("\nInput: (%f, %f), Output: %f, Expected: %f\n",
+    printf(
-      test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]);
+        "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0],
+        test_inputs[i][1], test_output, expected_outputs[i]);
  }
  for (int i = 0; i < M; ++i) {
    const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0);