#ifndef LAYER_H_
#define LAYER_H_

#include "matrices.h"
#include <cassert>
#include <math.h>

#define assertm(exp, msg) assert((void(msg), exp))

class Layer {
    public:
    Matrix input;
    Matrix weights;
    Matrix raw_output;
    Matrix activated_output;
    Matrix biases;

    // Planning for back propagation
    // Each layer needs the derivative of Z with respect to W, derivative of A with respect to Z and derivative of loss with respect to A
    // Let's call them dzw, daz and dca
    Matrix daz;

    static inline float Sigmoid(float);
    static inline float SigmoidPrime(float);

    inline void Forward(); // Forward Pass with sigmoid
    inline void Forward(float (*activation)(float)); // Forward Pass with custom activation function

    inline void BackPropagate(Matrix);
    inline void BackPropagate(Matrix, Matrix, float (*activation)(float)); // To backpropagate, we need the derivative of loss with respect to A and the derivative of used activation function

    inline void Feed(Matrix);

    // Constructors
    // Input size, Size
    Layer(int, int);
    Layer();
};

void Layer::BackPropagate(Matrix dzw, Matrix dca, float (*derivative)(float)){
    // Calculate daz ; derivative of activation function
    this->daz = this->activated_output.Function(derivative);
    // this->daz.Print("daz");

    // We need to transpose dzw and extend down
    // dzw.Print("dzw");
    dzw = dzw.Transpose().ExtendDown(dca.values.size());
    // dzw.Print("dzw extended transposed");

    Matrix dcw = this->daz.Hadamard(&dca).ExtendRight(this->input.values.size());
    // dcw.Print("daz . dca");
    dcw = dcw.Hadamard(&dzw);
    // dcw.Print("daz . dca . dzw : DCW");

    // this->weights.Print("weights");

    // Apply dcw to weights
    float learning_rate = 0.1F;
    Matrix reduced_dcw = dcw.Multiply(learning_rate);
    // We SUBSTRACT the derivative of loss with respect to the weights.
    this->weights = this->weights.Substract(&reduced_dcw);
    // this->weights.Print("New weights");
}

Layer::Layer(){

}

Layer::Layer(int input_size, int size){
    this->input = Matrix(input_size, 1);

    // Every neuron has a weight for every input
    this->weights = Matrix(size, input_size);
    this->weights.Randomize(-1.0F, 1.0F);

    this->raw_output = Matrix(size, 1);
    this->activated_output = this->raw_output;

    // One bias per neuron
    this->biases = Matrix(size, 1);
    this->biases.Randomize(-1.0F, 1.0F);
}

void Layer::Feed(Matrix a){
    this->input = a;
}

float Layer::Sigmoid(float x){
    return 1 / (1 + exp(-x));
}

float Layer::SigmoidPrime(float x){
    float buffer = Layer::Sigmoid(x);
    return buffer * (1 - buffer);
}

void Layer::Forward(float (*activation)(float)){
    // Multiply weight matrix by input matrix
    // W x I + B = Z
    this->raw_output = this->weights.Multiply(&this->input).Add(&this->biases);

    // Now through activation function
    // A = F(Z)
    this->activated_output = this->raw_output.Function(activation);
}

void Layer::Forward(){
    this->Forward(&Layer::Sigmoid);
}

#endif