From 3df7b6fb0c65295eed4590e6f166d60e89b3c68e Mon Sep 17 00:00:00 2001
From: 3gg <3gg@shellblade.net>
Date: Thu, 23 Nov 2023 10:02:33 -0800
Subject: Documentation.

---
 src/lib/src/matrix.c         |  2 +-
 src/lib/src/neuralnet_impl.h |  2 +-
 src/lib/src/train.c          | 15 +++++++++------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c
index 174504f..d98c8bb 100644
--- a/src/lib/src/matrix.c
+++ b/src/lib/src/matrix.c
@@ -6,7 +6,7 @@
 
 nnMatrix nnMatrixMake(int rows, int cols) {
   R* values = calloc(rows * cols, sizeof(R));
-  assert(values != 0);
+  assert(values != 0); // TODO: Make it a hard assert.
 
   return (nnMatrix){
       .rows   = rows,
diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h
index 18694f4..f5a9c63 100644
--- a/src/lib/src/neuralnet_impl.h
+++ b/src/lib/src/neuralnet_impl.h
@@ -30,7 +30,7 @@ typedef struct nnNeuralNetwork {
 /// |network_outputs| points to the last output matrix in |layer_outputs| for
 /// convenience.
 typedef struct nnQueryObject {
-  int       num_layers;
+  int       num_layers;      // Same as nnNeuralNetwork::num_layers.
   nnMatrix* layer_outputs;   // Output matrices, one output per layer.
   nnMatrix* network_outputs; // Points to the last output matrix.
 } nnTrainingQueryObject;
diff --git a/src/lib/src/train.c b/src/lib/src/train.c
index 9244907..dc93f0f 100644
--- a/src/lib/src/train.c
+++ b/src/lib/src/train.c
@@ -219,13 +219,15 @@ void nnTrain(
       // Assuming one training input per iteration for now.
       nnMatrixTranspose(&training_inputs, &training_inputs_T);
 
-      // Run a forward pass and compute the output layer error.
-      // We don't square the error here; instead, we just compute t-o, which is
-      // part of the derivative, -2(t-o). Also, we compute o-t instead to
-      // remove that outer negative sign.
+      // Run a forward pass and compute the output layer error relevant to the
+      // derivative: o-t.
+      //   Error: (t-o)^2
+      //   dE/do = -2(t-o)
+      //         = +2(o-t)
+      // Note that we compute o-t instead to remove that outer negative sign.
+      // The 2 is dropped because we are only interested in the direction of the
+      // gradient. The learning rate controls the magnitude.
       nnQuery(net, query, &training_inputs);
-      // nnMatrixSub(&training_targets, training_outputs,
-      // &errors[net->num_layers - 1]);
       nnMatrixSub(
           training_outputs, &training_targets, &errors[net->num_layers - 1]);
 
@@ -328,6 +330,7 @@ void nnTrain(
         params->max_iterations, ComputeMSE(&errors[net->num_layers - 1]));
   }
 
+  // Clean up.
   for (int l = 0; l < net->num_layers; ++l) {
     nnMatrixDel(&errors[l]);
     nnMatrixDel(&outputs_T[l]);
-- 
cgit v1.2.3