1 files changed, 9 insertions, 6 deletions
diff --git a/src/lib/src/train.c b/src/lib/src/train.c
index 9244907..dc93f0f 100644
--- a/src/lib/src/train.c
+++ b/src/lib/src/train.c
@@ -219,13 +219,15 @@ void nnTrain(
      // Assuming one training input per iteration for now.
      nnMatrixTranspose(&training_inputs, &training_inputs_T);
-      // Run a forward pass and compute the output layer error.
+      // Run a forward pass and compute the output layer error relevant to the
-      // We don't square the error here; instead, we just compute t-o, which is
+      // derivative: o-t.
-      // part of the derivative, -2(t-o). Also, we compute o-t instead to
+      //   Error: (t-o)^2
-      // remove that outer negative sign.
+      //   dE/do = -2(t-o)
+      //         = +2(o-t)
+      // Note that we compute o-t instead to remove that outer negative sign.
+      // The 2 is dropped because we are only interested in the direction of the
+      // gradient. The learning rate controls the magnitude.
      nnQuery(net, query, &training_inputs);
-      // nnMatrixSub(&training_targets, training_outputs,
-      // &errors[net->num_layers - 1]);
      nnMatrixSub(
          training_outputs, &training_targets, &errors[net->num_layers - 1]);
@@ -328,6 +330,7 @@ void nnTrain(
        params->max_iterations, ComputeMSE(&errors[net->num_layers - 1]));
  }
+  // Clean up.
  for (int l = 0; l < net->num_layers; ++l) {
    nnMatrixDel(&errors[l]);
    nnMatrixDel(&outputs_T[l]);