From 17cdabf289aad7ceaa3d8614d7cbc5de6d2adc80 Mon Sep 17 00:00:00 2001 From: vik <> Date: Fri, 29 Nov 2024 11:43:24 -0600 Subject: [PATCH] full impl done - backprop finished --- snn.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/snn.c b/snn.c index 89f0a75..4e40ce6 100644 --- a/snn.c +++ b/snn.c @@ -18,6 +18,7 @@ typedef struct Layer { // next layer while n is the number of neurons in the current layer // -> exploit BLAS to matmul and get the results of the next layer gsl_matrix* values; // the layer's values + gsl_matrix* biases; // weights for the bias. bias will always be 1 } Layer; double uniformrandom(double low, double high) { @@ -25,10 +26,16 @@ double uniformrandom(double low, double high) { return low + ((double)rand() / (RAND_MAX / (high - low))); } +// activation function and it's derivative + double relu(double input, double alpha) { return (input >= 0) ? input : (alpha * input); } +double drelu(double input, double alpha) { + return (input >= 0) ? 1 : alpha; +} + Layer* createlayer(Layer* lprev, Layer* lnext, int neurons, gsl_matrix* nvalues) { Layer* self = (Layer*) calloc(1, sizeof(Layer)); if (self == NULL) return NULL; @@ -38,11 +45,12 @@ Layer* createlayer(Layer* lprev, Layer* lnext, int neurons, gsl_matrix* nvalues) self->neurons = neurons; assert(neurons == nvalues->size1); - self->values = nvaules; + self->values = nvalues; // setup the weights matrix assert(lnext != NULL); self->weights = gsl_matrix_calloc(lnext->neurons, neurons); + self->biases = gsl_matrix_calloc(lnext->neurons, 1); // make the matrix have uniform random values from -0.5 to 0.5 gsl_matrix_set_all(self->weights, uniformrandom(-0.5, 0,5)); return self; @@ -52,13 +60,15 @@ void freelayer(Layer* layer) { assert(layer != NULL); if (layer->weights != NULL) gsl_matrix_free(layer->weights); if (layer->values != NULL) gsl_matrix_free(layer->values); + if (layer->biases != NULL) gsl_matrix_free(layer->biases); free(layer); } void forwardprop(Layer* layer) { assert(layer->next != NULL); gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, layer->weights, layer->values, 0, layer->next->values); - // layer->next->values will only ever have a single row + gsl_matrix_add(layer->next->values, layer->biases); + // layer->next->values will only ever have a single column for(unsigned int i = 0; i < layer->next->values->size1; i++) { double davalue = gsl_matrix_get(layer->next->values, i, 0); gsl_matrix_set(layer->next->values, i, 0, relu(davalue, ALPHA)); @@ -91,17 +101,53 @@ double msecost(Layer* layer, gsl_matrix* expected) { void backprop(Layer* layer, gsl_matrix* expected) { // b/c you use mse, you can just do like ouput layer - expected output (matrix subtraction) - assert(layer->previous != NULL); - // signifies this is the output layer - previous layer would be hidden layer (ideally) - gsl_matrix* deltao = gsl_matrix_alloc(layer->neurons, 1); - gsl_matrix_memcpy(deltao, layer->values); - gsl_matrix_sub(deltao, expected); - gsl_matrix* prevlayertranposed = gsl_matrix_alloc(1, layer->previous->values->size2); + assert(layer->previous != NULL); + gsl_matrix* deltao = gsl_matrix_alloc(layer->neurons, 1); + gsl_matrix_memcpy(deltao, layer->values); + gsl_matrix_sub(deltao, expected); + if (layer->next == NULL) { + // signified this is the output layer + gsl_matrix* prevlayertranposed = gsl_matrix_alloc(layer->previous->values->size2, layer->previous->values->size1); gsl_matrix_transpose_memcpy(prevlayertranposed, layer->previous->values); gsl_matrix* updatedweights = gsl_matrix_alloc(layer->neurons, layer->previous->neurons); gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, deltao, prevlayertranposed, 0.0, updatedweights); gsl_matrix_scale(updatedweights, (double)(LEARNING_RATE * -1.00)); gsl_matrix_memcpy(layer->previous->weights, updatedweights); - // finish impl + gsl_matrix* updatedbiases = gsl_matrix_alloc(layer->neurons, 1); + gsl_matrix_memcpy(updatedbiases, deltao); + gsl_matrix_scale(updatedbiases, (double)(LEARNING_RATE * -1.00)); + gsl_matrix_memcpy(layer->previous->biases, updatedbiases); + gsl_matrix_free(prevlayertranposed); + gsl_matrix_free(updatedweights); + gsl_matrix_free(updatedbiases); + } else { + // weights connecting the input layer to the hidden layer - cant do MSE trick + gsl_matrix* deltah = gsl_matrix_alloc(layer->neurons, 1); + for (unsigned int i = 0; i < layer->neurons; i++) { + gsl_matrix_set(deltah, i, 0, drelu(gsl_matrix_get(layer->values, i, 0), ALPHA)); + // derivative values are set in deltah + } + gsl_matrix* weightstransposed = gsl_matrix_alloc(layer->weights->size2, layer->weights->size1); + gsl_matrix_transpose_memcpy(weightstransposed, layer->weights); + gsl_matrix* transposedweightsmultipliedbydelta = gsl_matrix_alloc(weightstransposed->size1, 1); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, weightstransposed, deltao, 0.0, transposedweightsmultipliedbydelta); + gsl_matrix_mul_elements(deltah, transposedweightsmultipliedbydelta); + gsl_matrix* previnputtransposed = gsl_matrix_alloc(layer->previous->values->size2, layer->previous->values->size1); + gsl_matrix_transpose_memcpy(previnputtransposed, layer->previous->values); + gsl_matrix* updatedweights = gsl_matrix_alloc(layer->neurons, layer->previous->neurons); + gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, deltah, previnputtransposed, 0.0, updatedweights); + gsl_matrix_scale(updatedweights, (double)(LEARNING_RATE * -1.00)); + gsl_matrix_memcpy(layer->previous->weights, updatedweights); + gsl_matrix* updatedbiases = gsl_matrix_alloc(layer->neurons, 1); + gsl_matrix_memcpy(updatedbiases, deltah); + gsl_matrix_scale(updatedbiases, (double)(LEARNING_RATE * -1.00)); + gsl_matrix_memcpy(layer->previous->biases, updatedbiases); + gsl_matrix_free(deltah); + gsl_matrix_free(weightstransposed); + gsl_matrix_free(transposedweightsmultipliedbydelta); + gsl_matrix_free(previnputtransposed); + gsl_matrix_free(updatedweights); + gsl_matrix_free(updatedbiases); } + gsl_matrix_free(deltao); }