From dc538733da8d49e7240d00fb05517053076fe261 Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Sat, 16 Dec 2023 11:06:03 -0800 Subject: Define vector outer product (nnMatrixMulOuter), which removes the need to transpose layer inputs during training. --- src/lib/include/neuralnet/matrix.h | 13 ++++++++++--- src/lib/src/matrix.c | 26 ++++++++++++++++++++++++++ src/lib/src/train.c | 28 +++------------------------- 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h index f80b985..4cb0d25 100644 --- a/src/lib/include/neuralnet/matrix.h +++ b/src/lib/include/neuralnet/matrix.h @@ -56,13 +56,20 @@ void nnMatrixInitConstant(nnMatrix*, R value); /// Multiply two matrices. void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); -/// Multiply two matrices, row variant. +/// Multiply two matrices, row-by-row variant. /// -/// This function multiples two matrices row-by-row instead of row-by-column. -/// nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O). +/// This function multiples two matrices row-by-row instead of row-by-column, +/// which is equivalent to regular multiplication after transposing the right +/// hand matrix. +/// +/// nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O). void nnMatrixMulRows( const nnMatrix* left, const nnMatrix* right, nnMatrix* out); +/// Compute the outer product of two vectors. +void nnMatrixMulOuter( + const nnMatrix* left, const nnMatrix* right, nnMatrix* out); + /// Matrix multiply-add. /// /// out = left + (right * scale) diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c index d5c3fcc..29511eb 100644 --- a/src/lib/src/matrix.c +++ b/src/lib/src/matrix.c @@ -189,6 +189,32 @@ void nnMatrixMulRows( } } +void nnMatrixMulOuter( + const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { + assert(left != 0); + assert(right != 0); + assert(out != 0); + assert(out != left); + assert(out != right); + assert((left->rows == 1) || (left->cols == 1)); // Vector. + assert((right->rows == 1) || (right->cols == 1)); // Vector. + const int N = left->rows * left->cols; + const int M = right->rows * right->cols; + assert((out->rows == N) && (out->cols == M)); + + const R* left_value = left->values; + R* out_value = out->values; + + for (int i = 0; i < N; ++i) { + const R* right_value = right->values; + + for (int j = 0; j < M; ++j) { + *out_value++ = *left_value * *right_value++; + } + left_value++; + } +} + void nnMatrixMulAdd( const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { assert(left); diff --git a/src/lib/src/train.c b/src/lib/src/train.c index ccff553..fe9f598 100644 --- a/src/lib/src/train.c +++ b/src/lib/src/train.c @@ -153,14 +153,9 @@ void nnTrain( nnGradientElements* gradient_elems = calloc(net->num_layers, sizeof(nnGradientElements)); - // Allocate the output transpose vectors for weight delta calculation. - // This is one column vector per layer. - nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); - assert(errors != 0); assert(weight_deltas != 0); assert(gradient_elems); - assert(outputs_T); for (int l = 0; l < net->num_layers; ++l) { const int layer_input_size = nnLayerInputSize(net, l); @@ -169,7 +164,6 @@ void nnTrain( errors[l] = nnMatrixMake(1, layer_output_size); weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size); - outputs_T[l] = nnMatrixMake(layer_output_size, 1); // Allocate the gradient elements and vectors for weight delta calculation. nnGradientElements* elems = &gradient_elems[l]; @@ -199,9 +193,6 @@ void nnTrain( // the outputs. const nnMatrix* const training_outputs = query->network_outputs; - // A vector to store the training input transposed. - nnMatrix training_inputs_T = nnMatrixMake(inputs->cols, 1); - // If debug mode is requested, we will show progress every Nth iteration. const int progress_frame = (params->max_iterations < PROGRESS_THRESHOLD) @@ -223,10 +214,6 @@ void nnTrain( const nnMatrix training_targets = nnMatrixBorrowRows((nnMatrix*)targets, sample, 1); - // Will need the input transposed for backpropagation. - // Assuming one training input per iteration for now. - nnMatrixTranspose(&training_inputs, &training_inputs_T); - // Forward pass. nnQuery(net, query, &training_inputs); @@ -240,14 +227,11 @@ void nnTrain( nnMatrixSub( training_outputs, &training_targets, &errors[net->num_layers - 1]); - // Update outputs_T, which we need during weight updates. - for (int l = 0; l < net->num_layers; ++l) { - nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); - } - // Update weights and biases for each internal layer, back-propagating // errors along the way. for (int l = net->num_layers - 1; l >= 0; --l) { + const nnMatrix* layer_input = + (l == 0) ? &training_inputs : &query->layer_outputs[l - 1]; const nnMatrix* layer_output = &query->layer_outputs[l]; nnGradientElements* elems = &gradient_elems[l]; nnMatrix* gradient = &elems->gradient; @@ -310,10 +294,7 @@ void nnTrain( nnMatrix* layer_biases = &linear->biases; // Outer product to compute the weight deltas. - // This layer's input is the previous layer's output. - const nnMatrix* input_T = - (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; - nnMatrixMul(input_T, gradient, &weight_deltas[l]); + nnMatrixMulOuter(layer_input, gradient, &weight_deltas[l]); // Update weights. nnMatrixScale(&weight_deltas[l], params->learning_rate); @@ -360,7 +341,6 @@ void nnTrain( // Clean up. for (int l = 0; l < net->num_layers; ++l) { nnMatrixDel(&errors[l]); - nnMatrixDel(&outputs_T[l]); nnMatrixDel(&weight_deltas[l]); nnGradientElements* elems = &gradient_elems[l]; @@ -378,9 +358,7 @@ void nnTrain( break; } } - nnMatrixDel(&training_inputs_T); free(errors); - free(outputs_T); free(weight_deltas); free(gradient_elems); } -- cgit v1.2.3