From 57bf2b46b4b277952d722f6439b72f9e40db129c Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Sat, 16 Dec 2023 11:29:33 -0800 Subject: Clarify some terminology. --- src/lib/src/train.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/lib/src/train.c b/src/lib/src/train.c index fe9f598..7559ece 100644 --- a/src/lib/src/train.c +++ b/src/lib/src/train.c @@ -239,17 +239,16 @@ void nnTrain( // Compute this layer's gradient. // - // By "gradient" we mean the expression common to the weights and bias - // gradients. This is the part of the expression that does not contain - // this layer's input. + // By 'gradient' we mean the subexpression common to all the gradients + // for this layer. + // For linear layers, this is the subexpression common to both the + // weights and bias gradients. // // Linear: G = id // Relu: G = (output_k > 0 ? 1 : 0) // Sigmoid: G = output_k * (1 - output_k) switch (layer->type) { case nnLinear: { - // TODO: Just copy the pointer? - *gradient = nnMatrixBorrow(&errors[l]); break; } case nnRelu: @@ -294,7 +293,7 @@ void nnTrain( nnMatrix* layer_biases = &linear->biases; // Outer product to compute the weight deltas. - nnMatrixMulOuter(layer_input, gradient, &weight_deltas[l]); + nnMatrixMulOuter(layer_input, &errors[l], &weight_deltas[l]); // Update weights. nnMatrixScale(&weight_deltas[l], params->learning_rate); @@ -304,7 +303,7 @@ void nnTrain( // This is the same formula as for weights, except that the o_j term // is just 1. nnMatrixMulSub( - layer_biases, gradient, params->learning_rate, layer_biases); + layer_biases, &errors[l], params->learning_rate, layer_biases); } } -- cgit v1.2.3