push to git

This commit is contained in:
vikshar 2025-01-16 22:03:28 -06:00
parent 40a2b072b8
commit cc944d8cc3
4 changed files with 535 additions and 199 deletions

531
cnn.c
View File

@ -4,6 +4,7 @@
#include <stdlib.h>
#include <math.h>
#include <string.h>
typedef enum {
input,
@ -35,6 +36,9 @@ typedef struct {
int filter_size; // single integer b/c filter will usually be square shaped
int stride;
int zero_padding; // single integer for how many layers of zero padding
int input_height;
int input_width;
int input_channels;
float (*weights);
float (*biases);
} conv_params;
@ -42,6 +46,8 @@ typedef struct {
struct {
int pool_size; // single integer again
int stride;
int input_height;
int input_width;
} pool_params;
struct {
@ -62,6 +68,13 @@ typedef struct {
int num_layers;
} Network;
Network* create_network(int capacity) {
Network* network = (Network*)malloc(sizeof(Network));
network->layers = (Layer**)malloc(capacity * sizeof(Layer*));
network->num_layers = capacity;
return network;
}
float he_init(int fan_in) {
float scale = sqrt(2.0f / fan_in);
float random = (float)rand() / RAND_MAX * 2 - 1;
@ -92,20 +105,20 @@ float sigmoid_g(float x) {
}
void softmax(float* input, float* output, int size) {
float max = input[0];
for(int i = 1; i < size; i++) {
if(input[i] > max) {
max = input[i];
}
}
float sum = 0;
for(int i = 0; i < size; i++) {
output[i] = exp(input[i] - max);
sum += output[i];
}
for(int i = 0; i < size; i++) {
output[i] /= sum;
}
float max = input[0];
for(int i = 1; i < size; i++) {
if(input[i] > max) {
max = input[i];
}
}
float sum = 0;
for(int i = 0; i < size; i++) {
output[i] = exp(input[i] - max);
sum += output[i];
}
for(int i = 0; i < size; i++) {
output[i] /= sum;
}
}
Layer* create_input(int height, int width, int channels) {
@ -125,6 +138,9 @@ Layer* create_conv(int input_height, int input_width, int input_channels, int nu
layer->params.conv_params.filter_size = filter_size;
layer->params.conv_params.stride = stride;
layer->params.conv_params.zero_padding = padding;
layer->params.conv_params.input_height = input_height;
layer->params.conv_params.input_width = input_width;
layer->params.conv_params.input_channels = input_channels;
// output dimensions
// https://cs231n.github.io/convolutional-networks/
@ -133,6 +149,7 @@ Layer* create_conv(int input_height, int input_width, int input_channels, int nu
layer->height = output_h;
layer->width = output_w;
layer->channels = num_filters;
layer->activation_g = relu_g;
// conv layer uses relu, use HE init
int weights_size = num_filters * input_channels * filter_size * filter_size;
@ -145,7 +162,8 @@ Layer* create_conv(int input_height, int input_width, int input_channels, int nu
layer->params.conv_params.biases = (float*)calloc(num_filters, sizeof(float));
layer->output = (float*) calloc(output_h * output_w * num_filters, sizeof(float));
layer->delta = (float*) calloc(output_h * output_w * num_filters, sizeof(float));
layer->delta = (float*) calloc(output_h * output_w * num_filters, sizeof(float));
layer->pre_activation = (float*)calloc(output_h * output_w * num_filters, sizeof(float));
return layer;
}
@ -155,6 +173,9 @@ Layer* create_maxpool(int input_height, int input_width, int input_channels, int
layer->type = max_pool;
layer->params.pool_params.pool_size = pool_size;
layer->params.pool_params.stride = stride;
layer->params.pool_params.input_height = input_height;
layer->params.pool_params.input_width = input_width;
// output dimensions
// https://cs231n.github.io/convolutional-networks/
@ -165,7 +186,7 @@ Layer* create_maxpool(int input_height, int input_width, int input_channels, int
layer->channels = input_channels;
layer->output = (float*) calloc(output_h * output_w * input_channels, sizeof(float));
layer->delta = (float*) calloc(output_h * output_w * input_channels, sizeof(float));
layer->delta = (float*) calloc(output_h * output_w * input_channels, sizeof(float));
return layer;
}
@ -175,6 +196,7 @@ Layer* create_fc(int output_size, int input_size, activation type) {
layer->type = fully_connected;
layer->params.fc_params.output_size = output_size;
layer->params.fc_params.type = type; // activation type can either be sigmoid or softmax (output layer)
layer->activation_g = (type == a_sigmoid) ? sigmoid_g : NULL; // null is softmax (doesnt have a gradient)
// use glorot initalization
layer->params.fc_params.weights = (float*)calloc(output_size * input_size, sizeof(float));
@ -189,6 +211,7 @@ Layer* create_fc(int output_size, int input_size, activation type) {
layer->channels = output_size;
layer->output = (float*) calloc(output_size, sizeof(float));
layer->delta = (float*) calloc(output_size, sizeof(float));
layer->pre_activation = (float*) calloc(output_size, sizeof(float));
return layer;
}
@ -197,110 +220,132 @@ void free_layer(Layer* layer) {
switch (layer->type) {
case input:
free(layer->output);
free(layer);
free(layer);
break;
case conv:
free(layer->params.conv_params.weights);
free(layer->params.conv_params.biases);
free(layer->output);
free(layer->delta);
free(layer);
free(layer->params.conv_params.biases);
free(layer->output);
free(layer->delta);
free(layer->pre_activation);
free(layer);
break;
case max_pool:
free(layer->output);
free(layer->delta);
free(layer);
free(layer->delta);
free(layer);
break;
case fully_connected:
free(layer->params.fc_params.weights);
free(layer->params.fc_params.biases);
free(layer->output);
free(layer->delta);
free(layer);
free(layer->params.fc_params.biases);
free(layer->output);
free(layer->delta);
free(layer->pre_activation);
free(layer);
break;
}
}
void destroy_network(Network* network) {
if (!network) return;
for (int i = 0; i < network->num_layers; i++) {
if (network->layers[i]) {
free_layer(network->layers[i]);
}
}
free(network->layers);
free(network);
}
void conv_forward(Layer* layer, float* input) {
int padding = layer->params.conv_params.zero_padding;
int stride = layer->params.conv_params.stride;
int filter_size = layer->params.conv_params.filter_size;
int num_filters = layer->params.conv_params.num_filters;
int input_height = layer->height; // from previous layer
int input_width = layer->width;
int input_channels = layer->channels;
int input_height = layer->params.conv_params.input_height;
int input_width = layer->params.conv_params.input_width;
int input_channels = layer->params.conv_params.input_channels;
int padded_height = input_height + 2 * padding;
int padded_width = input_width + 2 * padding;
float* padded_input = (float*) calloc(padded_height * padded_width * input_channels, sizeof(float));
int padded_width = input_width + 2 * padding;
float* padded_input = (float*) calloc(padded_height * padded_width * input_channels, sizeof(float));
for (int c = 0; c < input_channels; c++) {
for (int h = 0; h < input_height; h++) {
for (int w = 0; w < input_width; w++) {
padded_input[c * padded_height * padded_width + (h + padding) * padded_width + (w + padding)] = input[c * input_height * input_width + h * input_width + w];
}
}
}
for (int h = 0; h < input_height; h++) {
for (int w = 0; w < input_width; w++) {
padded_input[c * padded_height * padded_width + (h + padding) * padded_width + (w + padding)] = input[c * input_height * input_width + h * input_width + w];
}
}
}
int output_height = (padded_height - filter_size) / stride + 1;
int output_width = (padded_width - filter_size) / stride + 1;
int output_size = output_height * output_width * num_filters;
int output_width = (padded_width - filter_size) / stride + 1;
int output_size = output_height * output_width * num_filters;
// for every filter
for(int f = 0; f < num_filters; f++) {
// for height and width
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
float sum = 0;
// for each "channel (feature maps coming in)", and filter size.
for(int c = 0; c < input_channels; c++) {
for(int fh = 0; fh < filter_size; fh++) {
for(int fw = 0; fw < filter_size; fw++) {
int ph = oh * stride + fh;
int pw = ow * stride + fw;
sum += padded_input[c * padded_height * padded_width + ph * padded_width + pw] * layer->params.conv_params.weights[f * input_channels * filter_size * filter_size + c * filter_size * filter_size + fh * filter_size + fw];
}
}
}
sum += layer->params.conv_params.biases[f];
layer->output[f * output_height * output_width + oh * output_width + ow] = relu(sum);
}
}
}
for(int f = 0; f < num_filters; f++) {
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
float sum = 0;
for(int c = 0; c < input_channels; c++) {
for(int fh = 0; fh < filter_size; fh++) {
for(int fw = 0; fw < filter_size; fw++) {
int ih = oh * stride + fh;
int iw = ow * stride + fw;
free(padded_input);
if (ih >= 0 && ih < padded_height && iw >= 0 && iw < padded_width) {
int input_idx = c * padded_height * padded_width + ih * padded_width + iw;
int weight_idx = f * input_channels * filter_size * filter_size +
c * filter_size * filter_size +
fh * filter_size + fw;
sum += padded_input[input_idx] * layer->params.conv_params.weights[weight_idx];
}
}
}
}
sum += layer->params.conv_params.biases[f];
int output_idx = f * output_height * output_width + oh * output_width + ow;
layer->pre_activation[output_idx] = sum;
layer->output[output_idx] = relu(sum);
}
}
}
free(padded_input);
}
void maxpool_forward(Layer* layer, float* input) {
int pool_size = layer->params.pool_params.pool_size;
int stride = layer->params.pool_params.stride;
// prev layer
int stride = layer->params.pool_params.stride;
// prev layer
int input_height = layer->height;
int input_width = layer->width;
int input_channels = layer->channels;
int output_height = (input_height - pool_size) / stride + 1;
int output_width = (input_width - pool_size) / stride + 1;
int output_size = output_height * output_width * input_channels;
int output_width = (input_width - pool_size) / stride + 1;
int output_size = output_height * output_width * input_channels;
for(int c = 0; c < input_channels; c++) {
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
float max_val = -INFINITY;
for(int ph = 0; ph < pool_size; ph++) {
for(int pw = 0; pw < pool_size; pw++) {
int ih = oh * stride + ph;
int iw = ow * stride + pw;
float val = input[c * input_height * input_width + ih * input_width + iw];
if(val > max_val) {
max_val = val;
}
}
}
layer->output[c * output_height * output_width + oh * output_width + ow] = max_val;
}
}
}
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
float max_val = -INFINITY;
for(int ph = 0; ph < pool_size; ph++) {
for(int pw = 0; pw < pool_size; pw++) {
int ih = oh * stride + ph;
int iw = ow * stride + pw;
float val = input[c * input_height * input_width + ih * input_width + iw];
if(val > max_val) {
max_val = val;
}
}
}
layer->output[c * output_height * output_width + oh * output_width + ow] = max_val;
}
}
}
}
void fc_forward(Layer* layer, float* input) {
@ -309,53 +354,55 @@ void fc_forward(Layer* layer, float* input) {
// flatten
float* flattened_input = (float*) calloc(input_size, sizeof(float));
for(int i = 0; i < input_size; i++) {
flattened_input[i] = input[i];
}
for(int i = 0; i < input_size; i++) {
flattened_input[i] = input[i];
}
// matmul (output = bias + (input * weight))
float* temp_output = (float*) calloc(output_size, sizeof(float));
for(int o = 0; o < output_size; o++) {
float sum = 0;
for(int i = 0; i < input_size; i++) {
sum += flattened_input[i] * layer->params.fc_params.weights[o * input_size + i];
}
sum += layer->params.fc_params.biases[o];
temp_output[o] = sum;
}
for(int o = 0; o < output_size; o++) {
float sum = 0;
for(int i = 0; i < input_size; i++) {
sum += flattened_input[i] * layer->params.fc_params.weights[o * input_size + i];
}
sum += layer->params.fc_params.biases[o];
temp_output[o] = sum;
}
// apply the correct activation (sigmoid for non output layers, softmax for output)
if(layer->params.fc_params.type == a_sigmoid) {
for(int o = 0; o < output_size; o++) {
layer->output[o] = sigmoid(temp_output[o]);
}
} else if(layer->params.fc_params.type == a_softmax) {
softmax(temp_output, layer->output, output_size);
}
for(int o = 0; o < output_size; o++) {
layer->pre_activation[o] = temp_output[o];
layer->output[o] = sigmoid(temp_output[o]);
}
} else if(layer->params.fc_params.type == a_softmax) {
softmax(temp_output, layer->output, output_size);
}
free(temp_output);
free(flattened_input);
free(temp_output);
free(flattened_input);
}
void forward_propagation(Layer* layer, float* input_fc) {
int input_size;
switch(layer->type) {
case input:
// input to layer->output
int input_size = layer->height * layer->width * layer->channels;
for(int i = 0; i < input_size; i++) {
layer->output[i] = input_fc[i];
}
break;
case conv:
conv_forward(layer, input);
break;
case max_pool:
maxpool_forward(layer, input);
break;
case fully_connected:
fc_forward(layer, input);
break;
}
// input to layer->output
input_size = (layer->height * layer->width * layer->channels);
for(int i = 0; i < input_size; i++) {
layer->output[i] = input_fc[i];
}
break;
case conv:
conv_forward(layer, input_fc);
break;
case max_pool:
maxpool_forward(layer, input_fc);
break;
case fully_connected:
fc_forward(layer, input_fc);
break;
}
}
void network_forward(Network* network, float* input) {
@ -368,91 +415,191 @@ void network_forward(Network* network, float* input) {
void fc_backward(Layer* layer, float* prev_delta, float* input, float learning_rate) {
int output_size = layer->params.fc_params.output_size;
int input_size = layer->height * layer->width * layer->channels;
int input_size = layer->height * layer->width * layer->channels;
// gradient of weights
float* gradient;
if(layer->params.fc_params.type == a_softmax) {
gradient = (float*)malloc(output_size * sizeof(float));
for(int i = 0; i < output_size; i++) {
gradient[i] = layer->output[i];
if(prev_delta[i] > 0.5) { // one hot encoded
gradient[i] -= 1.0;
}
}
} else {
gradient = prev_delta;
}
// update weights and biases
for(int o = 0; o < output_size; o++) {
for(int i = 0; i < input_size; i++) {
layer->params.fc_params.weights[o * input_size + i] -= learning_rate * prev_delta[o] * input[i];
}
layer->params.fc_params.biases[o] -= learning_rate * prev_delta[o];
}
for(int i = 0; i < input_size; i++) {
layer->params.fc_params.weights[o * input_size + i] -=
learning_rate * gradient[o] * input[i];
}
layer->params.fc_params.biases[o] -= learning_rate * gradient[o];
}
// gradient w/respect to inputs
for(int i = 0; i < input_size; i++) {
float sum = 0;
for(int o = 0; o < output_size; o++) {
sum += layer->params.fc_params.weights[o * input_size + i] * prev_delta[o];
}
layer->delta[i] = sum * layer->activation_g(layer->pre_activation[i]);
}
// gradient
if(layer->activation_g) {
for(int i = 0; i < input_size; i++) {
float sum = 0;
for(int o = 0; o < output_size; o++) {
sum += layer->params.fc_params.weights[o * input_size + i] * gradient[o];
}
layer->delta[i] = sum * layer->activation_g(layer->pre_activation[i]);
}
}
if(layer->params.fc_params.type == a_softmax) {
free(gradient);
}
}
void conv_backward(Layer* layer, float* prev_delta, float* input, float learning_rate) {
int num_filters = layer->params.conv_params.num_filters;
int channels = layer->channels;
int filter_size = layer->params.conv_params.filter_size;
int input_height = layer->height;
int input_width = layer->width;
int padding = layer->params.conv_params.zero_padding;
int stride = layer->params.conv_params.stride;
int output_height = (input_height + 2 * padding - filter_size) / stride + 1;
int output_width = (input_width + 2 * padding - filter_size) / stride + 1;
int channels = layer->channels;
int filter_size = layer->params.conv_params.filter_size;
int input_height = layer->height;
int input_width = layer->width;
int padding = layer->params.conv_params.zero_padding;
int stride = layer->params.conv_params.stride;
int output_height = (input_height + 2 * padding - filter_size) / stride + 1;
int output_width = (input_width + 2 * padding - filter_size) / stride + 1;
// gradient w/respect to filters
for(int f = 0; f < num_filters; f++) {
for(int c = 0; c < channels; c++) {
for(int fh = 0; fh < filter_size; fh++) {
for(int fw = 0; fw < filter_size; fw++) {
float grad = 0;
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
int ih = oh * stride + fh - padding;
int iw = ow * stride + fw - padding;
if(ih >= 0 && ih < input_height && iw >= 0 && iw < input_width) {
grad += input[c * input_height * input_width + ih * input_width + iw] * prev_delta[f * output_height * output_width + oh * output_width + ow];
}
}
}
int index = f * channels * filter_size * filter_size + c * filter_size * filter_size + fh * filter_size + fw;
layer->params.conv_params.weights[index] -= learning_rate * grad;
}
}
}
}
// gradient w/respect to biases
// gradient w/respect to filters
for(int f = 0; f < num_filters; f++) {
float grad = 0;
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
grad += prev_delta[f * output_height * output_width + oh * output_width + ow];
}
}
layer->params.conv_params.biases[f] -= learning_rate * grad;
}
for(int c = 0; c < channels; c++) {
for(int fh = 0; fh < filter_size; fh++) {
for(int fw = 0; fw < filter_size; fw++) {
float grad = 0;
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
int ih = oh * stride + fh - padding;
int iw = ow * stride + fw - padding;
if(ih >= 0 && ih < input_height && iw >= 0 && iw < input_width) {
grad += input[c * input_height * input_width + ih * input_width + iw] * prev_delta[f * output_height * output_width + oh * output_width + ow];
}
}
}
int index = f * channels * filter_size * filter_size + c * filter_size * filter_size + fh * filter_size + fw;
layer->params.conv_params.weights[index] -= learning_rate * grad;
}
}
}
}
// gradient with respect to inputs
for(int c = 0; c < channels; c++) {
for(int ih = 0; ih < input_height; ih++) {
for(int iw = 0; iw < input_width; iw++) {
float grad = 0;
for(int f = 0; f < num_filters; f++) {
for(int fh = 0; fh < filter_size; fh++) {
for(int fw = 0; fw < filter_size; fw++) {
int oh = (ih - fh + padding) / stride;
int ow = (iw - fw + padding) / stride;
if((ih - fh + padding) % stride == 0 && (iw - fw + padding) % stride == 0 && oh < output_height && ow < output_width) {
int w_index = f * channels * filter_size * filter_size + c * filter_size * filter_size + fh * filter_size + fw;
grad += layer->params.conv_params.weights[w_index] * prev_delta[f * output_height * output_width + oh * output_width + ow];
}
}
}
}
layer->delta[c * input_height * input_width + ih * input_width + iw] = grad * layer->activation_g(layer->pre_activation[c * input_height * input_width + ih * input_width + iw]);
}
}
}
// gradient w/respect to biases
for(int f = 0; f < num_filters; f++) {
float grad = 0;
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
grad += prev_delta[f * output_height * output_width + oh * output_width + ow];
}
}
layer->params.conv_params.biases[f] -= learning_rate * grad;
}
// gradient with respect to inputs
for(int c = 0; c < channels; c++) {
for(int ih = 0; ih < input_height; ih++) {
for(int iw = 0; iw < input_width; iw++) {
float grad = 0;
for(int f = 0; f < num_filters; f++) {
for(int fh = 0; fh < filter_size; fh++) {
for(int fw = 0; fw < filter_size; fw++) {
int oh = (ih - fh + padding) / stride;
int ow = (iw - fw + padding) / stride;
if((ih - fh + padding) % stride == 0 && (iw - fw + padding) % stride == 0 && oh < output_height && ow < output_width) {
int w_index = f * channels * filter_size * filter_size + c * filter_size * filter_size + fh * filter_size + fw;
grad += layer->params.conv_params.weights[w_index] * prev_delta[f * output_height * output_width + oh * output_width + ow];
}
}
}
}
layer->delta[c * input_height * input_width + ih * input_width + iw] = grad * layer->activation_g(layer->pre_activation[c * input_height * input_width + ih * input_width + iw]);
}
}
}
}
void maxpool_backward(Layer* layer, float* prev_delta, float* input, float learning_rate) {
int pool_size = layer->params.pool_params.pool_size;
int stride = layer->params.pool_params.stride;
int input_height = layer->params.pool_params.input_height;
int input_width = layer->params.pool_params.input_width;
int channels = layer->channels;
// Zero initialize deltas
memset(layer->delta, 0, input_height * input_width * channels * sizeof(float));
int output_height = layer->height;
int output_width = layer->width;
for(int c = 0; c < channels; c++) {
for(int oh = 0; oh < output_height; oh++) {
for(int ow = 0; ow < output_width; ow++) {
// finds max value
int maxI = -1, maxJ = -1;
float maxVal = -INFINITY;
for(int ph = 0; ph < pool_size; ph++) {
for(int pw = 0; pw < pool_size; pw++) {
int ih = oh * stride + ph;
int iw = ow * stride + pw;
// checks bounds
if (ih < input_height && iw < input_width) {
float val = input[c * input_height * input_width + ih * input_width + iw];
if(val > maxVal) {
maxVal = val;
maxI = ih;
maxJ = iw;
}
}
}
}
// only propagate gradient if a valid max position is found
if(maxI != -1 && maxJ != -1) {
int delta_idx = c * output_height * output_width + oh * output_width + ow;
layer->delta[c * input_height * input_width + maxI * input_width + maxJ] =
prev_delta[delta_idx];
}
}
}
}
}
void backward_propagation(Layer* layer, float* prev_delta, float* input_fc, float learning_rate) {
switch(layer->type) {
case fully_connected:
fc_backward(layer, prev_delta, input_fc, learning_rate);
break;
case conv:
conv_backward(layer, prev_delta, input_fc, learning_rate);
break;
case max_pool:
maxpool_backward(layer, prev_delta, input_fc, learning_rate);
break;
case input:
// No backpropagation for input layer
break;
}
}
void network_backward(Network* network, float* label, float learning_rate) {
// ouput
Layer* output_layer = network->layers[network->num_layers - 1];
// output gradient
for(int o = 0; o < output_layer->channels; o++) {
output_layer->delta[o] = output_layer->output[o] - label[o];
}
// backprop
for(int i = network->num_layers - 2; i >= 0; i--) {
Layer* current_layer = network->layers[i];
Layer* next_layer = network->layers[i + 1];
backward_propagation(current_layer, next_layer->delta, current_layer->output, learning_rate);
}
}

189
mnist.c Normal file
View File

@ -0,0 +1,189 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cnn.c"
#define IMG_HEIGHT 28
#define IMG_WIDTH 28
#define NUM_CLASSES 10
#define BATCH_SIZE 32
#define LEARNING_RATE 0.01
#define NUM_EPOCHS 10
float* read_mnist_images(const char* filename, int* num_images) {
FILE* fp = fopen(filename, "rb");
if (!fp) {
printf("Error opening file %s\n", filename);
return NULL;
}
int magic_number = 0;
fread(&magic_number, sizeof(int), 1, fp);
magic_number = ((magic_number & 0xff000000) >> 24) |
((magic_number & 0x00ff0000) >> 8) |
((magic_number & 0x0000ff00) << 8) |
((magic_number & 0x000000ff) << 24);
if (magic_number != 2051) {
printf("Invalid MNIST image file format\n");
fclose(fp);
return NULL;
}
fread(num_images, sizeof(int), 1, fp);
*num_images = ((*num_images & 0xff000000) >> 24) |
((*num_images & 0x00ff0000) >> 8) |
((*num_images & 0x0000ff00) << 8) |
((*num_images & 0x000000ff) << 24);
int rows, cols;
fread(&rows, sizeof(int), 1, fp);
fread(&cols, sizeof(int), 1, fp);
rows = ((rows & 0xff000000) >> 24) |
((rows & 0x00ff0000) >> 8) |
((rows & 0x0000ff00) << 8) |
((rows & 0x000000ff) << 24);
cols = ((cols & 0xff000000) >> 24) |
((cols & 0x00ff0000) >> 8) |
((cols & 0x0000ff00) << 8) |
((cols & 0x000000ff) << 24);
if (rows != IMG_HEIGHT || cols != IMG_WIDTH) {
printf("Invalid image dimensions\n");
fclose(fp);
return NULL;
}
float* images = (float*)malloc(*num_images * IMG_HEIGHT * IMG_WIDTH * sizeof(float));
unsigned char* temp = (unsigned char*)malloc(IMG_HEIGHT * IMG_WIDTH);
for (int i = 0; i < *num_images; i++) {
fread(temp, 1, IMG_HEIGHT * IMG_WIDTH, fp);
for (int j = 0; j < IMG_HEIGHT * IMG_WIDTH; j++) {
images[i * IMG_HEIGHT * IMG_WIDTH + j] = temp[j] / 255.0f;
}
}
free(temp);
fclose(fp);
return images;
}
float* read_mnist_labels(const char* filename, int* num_labels) {
FILE* fp = fopen(filename, "rb");
if (!fp) {
printf("Error opening file %s\n", filename);
return NULL;
}
int magic_number = 0;
fread(&magic_number, sizeof(int), 1, fp);
magic_number = ((magic_number & 0xff000000) >> 24) |
((magic_number & 0x00ff0000) >> 8) |
((magic_number & 0x0000ff00) << 8) |
((magic_number & 0x000000ff) << 24);
if (magic_number != 2049) {
printf("Invalid MNIST label file format\n");
fclose(fp);
return NULL;
}
fread(num_labels, sizeof(int), 1, fp);
*num_labels = ((*num_labels & 0xff000000) >> 24) |
((*num_labels & 0x00ff0000) >> 8) |
((*num_labels & 0x0000ff00) << 8) |
((*num_labels & 0x000000ff) << 24);
float* labels = (float*)calloc(*num_labels * NUM_CLASSES, sizeof(float));
unsigned char* temp = (unsigned char*)malloc(*num_labels);
fread(temp, 1, *num_labels, fp);
for (int i = 0; i < *num_labels; i++) {
labels[i * NUM_CLASSES + temp[i]] = 1.0f;
}
free(temp);
fclose(fp);
return labels;
}
int main() {
// load mnist
int num_train_images, num_train_labels;
float* train_images = read_mnist_images("train-images-idx3-ubyte", &num_train_images);
float* train_labels = read_mnist_labels("train-labels-idx1-ubyte", &num_train_labels);
// creating a lenet-5 inspired network
Network* network = create_network(8);
network->layers[0] = create_input(IMG_HEIGHT, IMG_WIDTH, 1);
network->layers[1] = create_conv(IMG_HEIGHT, IMG_WIDTH, 1, 6, 5, 1, 2);
network->layers[2] = create_maxpool(network->layers[1]->height, network->layers[1]->width, network->layers[1]->channels, 2, 2);
network->layers[3] = create_conv(network->layers[2]->height, network->layers[2]->width, network->layers[2]->channels, 16, 5, 1, 0);
network->layers[4] = create_maxpool(network->layers[3]->height, network->layers[3]->width, network->layers[3]->channels, 2, 2);
network->layers[5] = create_fc(120, network->layers[4]->height * network->layers[4]->width * network->layers[4]->channels, a_sigmoid);
network->layers[6] = create_fc(84, 120, a_sigmoid);
network->layers[7] = create_fc(NUM_CLASSES, 84, a_softmax);
// training loop
for (int epoch = 0; epoch < NUM_EPOCHS; epoch++) {
float total_loss = 0.0f;
int correct = 0;
for (int i = 0; i < num_train_images; i++) {
// forward pass
network_forward(network, &train_images[i * IMG_HEIGHT * IMG_WIDTH]);
// accuracy
float* output = network->layers[network->num_layers - 1]->output;
int predicted = 0;
float max_prob = output[0];
for (int j = 1; j < NUM_CLASSES; j++) {
if (output[j] > max_prob) {
max_prob = output[j];
predicted = j;
}
}
int true_label = 0;
for (int j = 0; j < NUM_CLASSES; j++) {
if (train_labels[i * NUM_CLASSES + j] > 0.5f) {
true_label = j;
break;
}
}
if (predicted == true_label) correct++;
// backprop
network_backward(network, &train_labels[i * NUM_CLASSES], LEARNING_RATE);
// cross entropy loss
float loss = 0.0f;
for (int j = 0; j < NUM_CLASSES; j++) {
if (train_labels[i * NUM_CLASSES + j] > 0.5f) {
loss -= log(output[j] + 1e-10);
}
}
total_loss += loss;
// progress
if ((i + 1) % 100 == 0) {
printf("Epoch %d/%d, Step %d/%d, Loss: %.4f, Accuracy: %.2f%%\n",
epoch + 1, NUM_EPOCHS, i + 1, num_train_images,
total_loss / (i + 1), 100.0f * correct / (i + 1));
}
}
printf("Epoch %d/%d completed, Average Loss: %.4f, Accuracy: %.2f%%\n",
epoch + 1, NUM_EPOCHS, total_loss / num_train_images,
100.0f * correct / num_train_images);
}
// Clean up
free(train_images);
free(train_labels);
destroy_network(network);
return 0;
}

BIN
train-images.idx3-ubyte Normal file

Binary file not shown.

BIN
train-labels.idx1-ubyte Normal file

Binary file not shown.