Tyche Trading

Neural Network (AI)

Neural Network (AI)

Our Open Source Neural Network for C#

See our documentation on how to set up and utilize our code at the link: https://tychetrading.io/neural-network-docs/

Download as Open Source Indicator for NinjaTrader:

				
					[System.Runtime.Serialization.DataContract]
class NeuralNetwork
{
    [System.Runtime.Serialization.DataMember]
    public Layer[] Layers;

    [System.Runtime.Serialization.DataMember]
    public double[] Loss;

    [System.Runtime.Serialization.DataMember]
    public double Accuracy;

    [System.Runtime.Serialization.DataContract]
    public class OptimizerFunctionHolder { }

    [System.Runtime.Serialization.DataContract]
    public class LossFunctionHolder { }

    [System.Runtime.Serialization.DataContract]
    public class AccuracyFunctionHolder { }

    [System.Runtime.Serialization.DataMember]
    LossFunction.Categorical_Crossentropy LossCCE;

    [System.Runtime.Serialization.DataMember]
    LossFunction.Binary_Categorical_Crossentropy LossBCCE;

    [System.Runtime.Serialization.DataMember]
    LossFunction.Squared_Error LossSE;

    [System.Runtime.Serialization.DataMember]
    LossFunction.Mean_Squared_Error LossMSE;

    [System.Runtime.Serialization.DataMember]
    LossFunction.Mean_Absolute_Error LossMAE;
    
    [System.Runtime.Serialization.DataMember]
    AccuracyFunction.True_Class_Mean AccuracyTCM;

    [System.Runtime.Serialization.DataContract]
    private class Config
    {
        [System.Runtime.Serialization.DataMember]
        private static double initConst;
        
        /*
        * Useable Loss Functions:
        * squared_error
        * categorical_crossentropy
        * binary_categorical_crossentropy
        * mean_squared_error
        * mean_absolute_error
        */
        [System.Runtime.Serialization.DataMember]
        private static string lossFunction;

        /*
        * Useable Accuracy Functions:
        * true_class_mean
        */
        [System.Runtime.Serialization.DataMember]
        private static string accuracyFunction;

        [System.Runtime.Serialization.DataMember]
        private static double learningRate;

        [System.Runtime.Serialization.DataMember]
        public static double InitConst { get { return initConst; } set { initConst = value; } }

        [System.Runtime.Serialization.DataMember]
        public static string LossFunction { get { return lossFunction; } set { lossFunction = value; } }

        [System.Runtime.Serialization.DataMember]
        public static string AccuracyFunction { get { return accuracyFunction; } set { accuracyFunction = value; } }

        [System.Runtime.Serialization.DataMember]
        public static double LearningRate { get { return learningRate; } set { learningRate = value; } }
    }

    /// <summary>
    /// Initialize the values of the weights and biases for entire neural network.
    /// </summary>
    /// <param name="randWeights">Toggle randomly setting the weights.</param>
    /// <param name="randBiases">Toggle randomly setting the biases.</param>
    /// <param name="constant">New initialization constant.</param>
    public void Init(
    [ System.Runtime.InteropServices.Optional ] OptimizerFunctionHolder optimizerFunction,
    [ System.Runtime.InteropServices.Optional ] LossFunctionHolder lossFunction, 
    [ System.Runtime.InteropServices.Optional ] AccuracyFunctionHolder accuracyFunction, 
    bool randWeights = true, bool randBiases = false, double constant = 0.01)
    {
        // initialize optimizer functions for each layer
        if (optimizerFunction == null)
            optimizerFunction = new OptimizerFunction.Stochastic_Gradient_Descent();

        if (optimizerFunction.GetType() == typeof(OptimizerFunction.Stochastic_Gradient_Descent))
        {
            int layerCount = Layers.Length;
            for (int i = 1; i < layerCount; i++)
            {
                Layers[i].SetOptimizer(new OptimizerFunction.Stochastic_Gradient_Descent((OptimizerFunction.Stochastic_Gradient_Descent)optimizerFunction));
            }
        }
        else if (optimizerFunction.GetType() == typeof(OptimizerFunction.Adaptive_Gradient))
        {
            int layerCount = Layers.Length;
            for (int i = 1; i < layerCount; i++)
            {
                Layers[i].SetOptimizer(new OptimizerFunction.Adaptive_Gradient((OptimizerFunction.Adaptive_Gradient)optimizerFunction));
            }
        }
        else if (optimizerFunction.GetType() == typeof(OptimizerFunction.Root_Mean_Square_Propegation))
        {
            int layerCount = Layers.Length;
            for (int i = 1; i < layerCount; i++)
            {
                Layers[i].SetOptimizer(new OptimizerFunction.Root_Mean_Square_Propegation((OptimizerFunction.Root_Mean_Square_Propegation)optimizerFunction));
            }
        }
        else if (optimizerFunction.GetType() == typeof(OptimizerFunction.Adaptive_Momentum))
        {
            int layerCount = Layers.Length;
            for (int i = 1; i < layerCount; i++)
            {
                Layers[i].SetOptimizer(new OptimizerFunction.Adaptive_Momentum((OptimizerFunction.Adaptive_Momentum)optimizerFunction));
            }
        }
        else
        {
            throw new System.ArgumentNullException("Invalid optimizer function.");
        }

        // initialize loss function
        if (lossFunction == null)
            lossFunction = new LossFunction.Categorical_Crossentropy();

        if (lossFunction.GetType() == typeof(LossFunction.Categorical_Crossentropy))
        {
            LossCCE = (LossFunction.Categorical_Crossentropy)lossFunction;
            Config.LossFunction = "categorical_crossentropy";
        }
        else if (lossFunction.GetType() == typeof(LossFunction.Binary_Categorical_Crossentropy))
        {
            LossBCCE = (LossFunction.Binary_Categorical_Crossentropy)lossFunction;
            Config.LossFunction = "squared_error";
        }
        else if (lossFunction.GetType() == typeof(LossFunction.Squared_Error))
        {
            LossSE = (LossFunction.Squared_Error)lossFunction;
            Config.LossFunction = "squared_error";
        }
        else if (lossFunction.GetType() == typeof(LossFunction.Mean_Squared_Error))
        {
            LossMSE = (LossFunction.Mean_Squared_Error)lossFunction;
            Config.LossFunction = "mean_squared_error";
        }
        else if (lossFunction.GetType() == typeof(LossFunction.Mean_Absolute_Error))
        {
            LossMAE = (LossFunction.Mean_Absolute_Error)lossFunction;
            Config.LossFunction = "mean_absolute_error";
        }
        else
        {
            throw new System.ArgumentNullException("Invalid loss function.");
        }

        // initialize accuracy function
        if (accuracyFunction == null)
            accuracyFunction = new AccuracyFunction.True_Class_Mean();

        if (accuracyFunction.GetType() == typeof(AccuracyFunction.True_Class_Mean))
        {
            AccuracyTCM = (AccuracyFunction.True_Class_Mean)accuracyFunction;
            Config.AccuracyFunction = "true_class_mean";
        }
        else
        {
            throw new System.ArgumentNullException("Invalid accuracy function.");
        }

        Config.InitConst = constant;

        int totalLayers = Layers.Length;

        for (int layer = 1; layer < totalLayers; layer++)
        {
            int priorLayerSize = Layers[layer - 1].Biases.Length;
            Layers[layer].Init(priorLayerSize, randWeights, randBiases);
        }
    }

    /// <summary>
    /// Train the neural network.
    /// </summary>
    /// <param name="input">The input values for the neural network.</param>
    /// <param name="target">The one-hot encoded target classes for the neural network.</param>
    /// <param name="testInput">The input values used to validate the neural network after training.</param>
    /// <param name="testInput">The target values used to validate the neural network after training.</param>
    /// <param name="epochs">Total number of times the entire dataset is passed through the neural network.</param>
    /// <param name="batchSize">The size of each batch passed through neural network at once(0 = all data as one batch).</param> 
    /// <param name="batchSize">The size of each batch passed through neural network at once during validation(0 = all data as one batch).</param> 
    /// <param name="printEveryIteration">Print data at every nth iteration.</param>
    /// <param name="printEveryEpoch">Print data at every nth epoch.</param>
    public void Train(double[,] input, double[,] target, double[,] testInput, double[,] testTarget, 
    int epochs = 1, int batchSize = 0, int printEveryIteration = 0, int printEveryEpoch = 1)
    {
        // check to ensure that data being passed matches up with network structure
        if (input.GetLength(1) != Layers[0].Biases.Length)
            throw new System.InvalidOperationException ("Input data does not match network input neuron structure.");
        else if (target.GetLength(1) != Layers[Layers.Length - 1].Biases.Length)
            throw new System.InvalidOperationException ("Target class data does not match network output neuron structure.");

        int totalLayers = Layers.Length;

        System.Random rand = new System.Random();

        int iterations;
        if (batchSize > 0)
            iterations = input.GetLength(0) / batchSize;
        else
            iterations = 1;

        for (int epoch = 0; epoch < epochs; epoch++)
        {
            // reset epoch values
            double epochDataLoss = 0;
            double epochRegularizationLoss = 0;
            double epochAccuracy = 0;
            double epochLearningRate = 0;

            // set batched data
            (double[][,] input, double[][,] target) batchedData = HelperMethods.BatchData(input, target, batchSize);

            int numBatches = batchedData.input.Length;

            for (int batch = 0; batch < numBatches; batch++)
            {
                //run through forward pass of entire neural network
                Layers[0].ActivationForward(batchedData.input[batch]);
                
                for (int layer = 1; layer < totalLayers - 1; layer++)
                {
                    Layers[layer].Forward(Layers[layer - 1].Outputs);
                    Layers[layer].ActivationForward(Layers[layer].PreOutputs);
                    Layers[layer].DropoutForward(); // this should not run on final layer
                }

                Layers[totalLayers - 1].Forward(Layers[totalLayers - 2].Outputs);
                Layers[totalLayers - 1].ActivationForward(Layers[totalLayers - 1].PreOutputs);
                
                // calculate the loss of current iteration
                LossForward(Layers[totalLayers - 1].Outputs, batchedData.target[batch]);

                // calculate loss
                double loss = 0;
                int length = Loss.Length;
                for (int j = 0; j < length; j++)
                    loss += Loss[j];
                loss /= Loss.Length;

                // calculate loss gradient
                double[,] lossGradient = LossBackward(Layers[totalLayers - 1].Outputs, batchedData.target[batch]);

                // back propagate entire neural network
                Layers[totalLayers - 1].ActivationBackward(lossGradient);
                Layers[totalLayers - 1].Backward();
                Layers[totalLayers - 1].Optimize();

                for (int layer = totalLayers - 2; layer > 0; layer--)
                {
                    Layers[layer].DropoutBackward(Layers[layer + 1].DevOutputs);
                    Layers[layer].ActivationBackward(Layers[layer].DevInputs);
                    Layers[layer].Backward();
                    Layers[layer].Optimize();
                }

                // calculate regularization loss for each specified layer
                double regularizationLoss = 0;
                for (int layer = 1; layer < totalLayers; layer++)
                {
                    regularizationLoss += LossFunction.Regularization.ForwardWeights(Layers[layer].Weights, Layers[layer].WeightRegulizer_L1, Layers[layer].WeightRegulizer_L2);
                    regularizationLoss += LossFunction.Regularization.ForwardBiases(Layers[layer].Biases, Layers[layer].BiasRegulizer_L1, Layers[layer].BiasRegulizer_L2);
                }

                // calculate accuracy
                CalculateAccuracy(Layers[totalLayers - 1].Outputs, batchedData.target[batch]);

                // display the progress in System.Console
                if (printEveryIteration > 0)
                {
                    if ((batch + 1) % printEveryIteration == 0 || batch == 0 || batch + 1 == numBatches)
                    {
                        if (batch == 0)
                            System.Console.WriteLine("Epoch: " + (epoch + 1));
                        Evaluate.Display(loss, Accuracy, batch, regularizationLoss, Config.LearningRate);
                    }
                }

                // update epoch totals
                epochDataLoss += loss;
                epochRegularizationLoss += regularizationLoss;
                epochAccuracy += Accuracy;
                epochLearningRate += Config.LearningRate;
            }

            // display the progress
            if (printEveryEpoch > 0)
            {
                if ((epoch + 1) % printEveryEpoch == 0 || epoch == 0 || epoch + 1 == numBatches)
                {
                    Evaluate.DisplayEpoch(epochDataLoss, epochAccuracy, numBatches, epochRegularizationLoss, epochLearningRate, epoch);

                    // set batched data
                    (double[][,] input, double[][,] target) testBatchedData = HelperMethods.BatchData(testInput, testTarget);

                    // test neural network
                    Layers[0].ActivationForward(testBatchedData.input[0]);

                    for (int layer = 1; layer < totalLayers; layer++)
                    {
                        Layers[layer].Forward(Layers[layer - 1].Outputs);
                        Layers[layer].ActivationForward(Layers[layer].PreOutputs);
                    }           

                    // calculate the loss of current iteration
                    LossForward(Layers[totalLayers - 1].Outputs, testBatchedData.target[0]);

                    // calculate accuracy
                    CalculateAccuracy(Layers[totalLayers - 1].Outputs, testBatchedData.target[0]);

                    Evaluate.Results(Loss, Accuracy);
                }
            }
        }
    }

    /// <summary>
    /// A single non-backpropegating foward pass of neural network.
    /// </summary>
    /// <param name="input">The input values for the neural network.</param>
    /// <returns>The outputs of the neural network.</returns>
    public double[,] Predict(double[,] input)
    {
        int totalLayers = Layers.Length;

        double[,] output = new double[input.GetLength(0), Layers[Layers.Length - 1].Outputs.GetLength(1)];

        // test neural network
        Layers[0].ActivationForward(input);

        for (int layer = 1; layer < totalLayers; layer++)
        {
            Layers[layer].Forward(Layers[layer - 1].Outputs);
            Layers[layer].ActivationForward(Layers[layer].PreOutputs);
        }

        output = Layers[totalLayers - 1].Outputs;    

        return output;
    }
    
    /// <summary>
    /// Get the proprties of the neural network as is.
    /// </summary
    /// <returns>A string of the proprties of the neural network</returns>
    public string GetProperties()
    {
        string parameters = "___Network Parameters___\n" + 
                            "Loss Function: " + Config.LossFunction + "\n" +
                            "Accuracy Function: " + Config.AccuracyFunction + "\n" +
                            Layers[1].GetOptimizerParameters() + "\n" +
                            "__Layer Parameters__" + "\n";

        for (int i = 0; i < Layers.Length; i++)
        {
            parameters += "Layer: " + i + "\n" +
                        Layers[i].GetProperties();
        }

        return parameters;
    }

    [System.Runtime.Serialization.DataContract]
    public class Layer
    {
        #region Neuron Data
        private double[,] weights;

        private double[] biases;

        private double[,] inputs;

        private double[,] outputs;

        private double[,] preOutputs;

        private double[,] devWeights;

        private double[] devBiases;

        private double[,] devInputs;

        private double[,] devOutputs;

        private double weightRegulizer_L1;

        private double weightRegulizer_L2;

        private double biasRegulizer_L1;

        private double biasRegulizer_L2;

        [System.Runtime.Serialization.DataMember]
        private Dropout dropout;

        /*
        * Usable Activation Functions:
        * relu
        * softmax
        * sigmoid
        * tanh
        */
        [System.Runtime.Serialization.DataMember]
        private string activation;

        /*
        * Useable Optimizers:
        * Stochastic_Gradient_Descent
        * Adaptive_Gradient
        * Root_Mean_Square_Propegation
        * Adaptive_Momentum
        */
        [System.Runtime.Serialization.DataMember]
        private string optimizer;

        [System.Runtime.Serialization.DataMember]
        public OptimizerFunction.Stochastic_Gradient_Descent optimizerSGD;

        [System.Runtime.Serialization.DataMember]
        public OptimizerFunction.Adaptive_Gradient optimizerAG;

        [System.Runtime.Serialization.DataMember]
        public OptimizerFunction.Root_Mean_Square_Propegation optimizerRMSP;

        [System.Runtime.Serialization.DataMember]
        public OptimizerFunction.Adaptive_Momentum optimizerAM;

        /// <summary>
        /// Set current layers optimizer at custom stochastic gradient descent optimizer.
        /// </summary>
        public void SetOptimizer(OptimizerFunction.Stochastic_Gradient_Descent SGD)
        {
            this.optimizerSGD = SGD;
            this.optimizer = "stochastic_gradient_descent";
        }

        /// <summary>
        /// Set current layers optimizer at custom adaptive gradient optimizer.
        /// </summary>
        public void SetOptimizer(OptimizerFunction.Adaptive_Gradient AG)
        {
            this.optimizerAG = AG;
            this.optimizer = "adaptive_gradient";
        }

        /// <summary>
        /// Set current layers optimizer at custom root mean squared propagation optimizer.
        /// </summary>
        public void SetOptimizer(OptimizerFunction.Root_Mean_Square_Propegation RMSP)
        {
            this.optimizerRMSP = RMSP;
            this.optimizer = "root_mean_square_propegation";
        }

        /// <summary>
        /// Set current layers optimizer at custom adaptive momentum optimizer.
        /// </summary>
        public void SetOptimizer(OptimizerFunction.Adaptive_Momentum AM)
        {
            this.optimizerAM = AM;
            this.optimizer = "adaptive_momentum";
        }
        
        public double[,] Weights { get { return weights;} private set { weights = value; } }

        [System.Runtime.Serialization.DataMember]
        public double[] Biases { get { return biases;} private set {biases = value; } }

        public double[,] Inputs { get { return inputs;} private set { inputs = value; } }
        
        public double[,] Outputs { get { return outputs;} private set { inputs = value; } }

        public double[,] PreOutputs { get { return preOutputs;} private set { inputs = value; } }

        public double[,] DevWeights { get { return devWeights;} private set { inputs = value; } }

        public double[] DevBiases { get { return devBiases; } private set { devBiases = value; } }

        public double[,] DevInputs { get { return devInputs; } private set { inputs = value; } }

        public double[,] DevOutputs { get { return devOutputs; } private set { inputs = value; } }

        [System.Runtime.Serialization.DataMember]
        public string Activation { get { return activation; } set { activation = value; } }

        [System.Runtime.Serialization.DataMember]
        public double WeightRegulizer_L1 { get { return weightRegulizer_L1; } set { weightRegulizer_L1 = value; } }

        [System.Runtime.Serialization.DataMember]
        public double WeightRegulizer_L2 { get { return weightRegulizer_L2; } set { weightRegulizer_L2 = value; } }

        [System.Runtime.Serialization.DataMember]
        public double BiasRegulizer_L1 { get { return biasRegulizer_L1; } set { biasRegulizer_L1 = value; } }

        [System.Runtime.Serialization.DataMember]
        public double BiasRegulizer_L2 { get { return biasRegulizer_L2; } set { biasRegulizer_L2 = value; } }

        [System.Runtime.Serialization.DataMember]
        public string Optimizer { get { return optimizer; } set { optimizer = value;} }

        [System.Runtime.Serialization.DataMember]
        private double[][] serializedWeights;

        /// <summary>
        /// Turn multi-dimensional arrays into jagged arrays so they can be serialized.
        /// </summary>
        [System.Runtime.Serialization.OnSerializing]
        private void BeforeSerializing(System.Runtime.Serialization.StreamingContext context)
        {
            if (this.Weights != null)
            {
                int row = this.Weights.GetLength(0);
                int col = this.Weights.GetLength(1);

                this.serializedWeights = new double[row][];

                for (int i = 0; i < row; i++)
                {
                    this.serializedWeights[i] = new double[col];

                    for (int j = 0; j < col; j++)
                    {
                        this.serializedWeights[i][j] = this.Weights[i, j];
                    }
                }
            }
        } 

        /// <summary>
        /// Turn serialized jagged arrays into originally shaped multi-dimensional arrays.
        /// </summary>                
        [System.Runtime.Serialization.OnDeserialized]
        private void AfterDeserializing(System.Runtime.Serialization.StreamingContext ctx)
        {
            if (this.serializedWeights == null)
            {
                this.Weights = null;
            }
            else
            {
                int row = this.serializedWeights.Length;
                int col = this.serializedWeights[0].Length;

                for (int i = 1; i < row; i++)
                {
                    if (this.serializedWeights[i].Length != col)
                    {
                        throw new System.InvalidOperationException("The serialized array does not match the multi-dimensional one");
                    }
                }

                this.Weights = new double[row, col];

                for (int i = 0; i < row; i++)
                {
                    for (int j = 0; j < col; j++)
                    {
                        this.Weights[i, j] = this.serializedWeights[i][j];
                    }
                }
            }
        }
        #endregion

        /// <summary>
        /// Set the size of the layer and the activation function upon calling new layer class instance.
        /// </summary>
        /// <param name="neurons">The number of neurons in the layer.</param>
        /// <param name="newActivation">The activation function for the layer.</param>
        public Layer(
        int neurons, string activationFunction ="linear",
        double weightRegulizer_L1 = 0, double weightRegulizer_L2 = 0, double biasRegulizer_L1 = 0, double biasRegulizer_L2 = 0, 
        double dropoutRate = 0)
        {
            this.activation = activationFunction;
            this.biases = new double[neurons];
            this.weightRegulizer_L1 = weightRegulizer_L1;
            this.weightRegulizer_L2 = weightRegulizer_L2;
            this.biasRegulizer_L1 = biasRegulizer_L1;
            this.biasRegulizer_L2 = biasRegulizer_L2;
            this.dropout = new Dropout(dropoutRate);
        }

        public Layer() { }

        /// <summary>
        /// Get the current layers properties.
        /// </summary>
        /// <returns>String of the layers properties.</returns>
        public string GetProperties()
        {
            string properties = "    Size: " + this.biases.Length + "\n" +
                                "    Activation Function: " + this.activation + "\n" +
                                "    " + this.GetDropoutParameters() +
                                "    Regularization Constants:\n" +
                                "        Lambda 1 Biases: " + this.biasRegulizer_L1 + "\n" +
                                "        Lambda 1 Weights: " + this.weightRegulizer_L1 + "\n" +
                                "        Lambda 2 Biases: " + this.biasRegulizer_L2 + "\n" +
                                "        Lambda 2 Weights: " + this.weightRegulizer_L2 + "\n";
            
            return properties;
        }

        /// <summary>
        /// Set the weights to a random value, and set the biases to 0.
        /// </summary>
        /// <param name="inputs">The number of neurons in the prior neuron layer.</param>
        /// <remarks>Uses Config.InitConst as constant.</remarks>
        public void Init(int inputs, bool randWeights, bool randBiases)
        {
            System.Random rand = new System.Random();

            this.weights = new double[this.biases.Length, inputs];

            int row = this.biases.Length;
            int col = inputs;

            for (int i = 0; i < row; i++)
            {
                for (int j = 0; j < col; j++)
                {
                    if (randWeights)
                        this.weights[i, j] = (rand.NextDouble() * 2 - 1) * Config.InitConst;
                    else
                        this.weights[i, j] = 0;
                }

                if (randBiases)
                    this.biases[i] = (rand.NextDouble() * 2 - 1) * Config.InitConst;
                else
                    this.biases[i] = 0;
            }
        }

        /// <summary>
        /// Set the weights of the neuron layer.
        /// </summary>
        /// <param name="newWeights">The desired weights for current neuron layer.</param>
        public void SetWeights(double[,] newWeights)
        {
            if (newWeights.GetLength(0) == this.weights.GetLength(0) && newWeights.GetLength(1) == this.weights.GetLength(1))
                this.weights = newWeights;
            else
                throw new System.InvalidOperationException ("Array set does not align with network weight structure.");
        }

        /// <summary>
        /// Set the biases of the neuron layer.
        /// </summary>
        /// <param name="newBiases">The desired biases for current neuron layer.</param>
        public void SetBiases(double[] newBiases)
        {
            if (newBiases.Length == this.biases.Length)
                this.biases = newBiases;
            else
                throw new System.InvalidOperationException ("Array set does not align with network bias structure.");
        }

        /// <summary>
        /// Get the weights of current neuron layer.
        /// </summary>
        /// <returns>The weights of current neuron layer.</return>
        public double[,] GetWeights()
        {
            return this.weights;
        }

        /// <summary>
        /// Get the biases of current neuron layer.
        /// </summary>
        /// <returns>The biases of current neuron layer.</return>
        public double[] GetBiases()
        {
            return this.biases;
        }

        /// <summary>
        /// Forward pass calculation of neuron layer.
        /// </summary>
        /// <param name="inputs">The inputs that are recieved for current neuron layer</param>
        /// <remarks>Sum(input(i) * weight(i)) + bias</remarks>
        public void Forward(double[,] inputs)
        {
            this.inputs = inputs;
            
            int batchSize       = inputs.GetLength(0);
            int totalInputs     = inputs.GetLength(1);
            int totalNeurons    = this.biases.Length;

            double[,] outputs = new double[batchSize, totalNeurons];

            // layer output calculation: Output(n) = Sum(input(i) * weight(i)) + bias
            for (int set = 0; set < batchSize; set++)
            {
                for (int neuron = 0; neuron < totalNeurons; neuron++)
                {
                    for (int inputValue = 0; inputValue < totalInputs; inputValue++)
                    {
                        outputs[set, neuron] += this.weights[neuron, inputValue] * this.inputs[set, inputValue];
                    }

                    outputs[set, neuron] += this.biases[neuron];
                }
            }

            this.preOutputs = outputs;
        }

        /// <summary>
        /// Back propagation for neuron layer.
        /// </summary>
        public void Backward()
        {
            int batchSize = this.devInputs.GetLength(0);
            int totalNeurons = this.devInputs.GetLength(1);

            this.devBiases = new double[totalNeurons];

            // calculate biases gradient
            for (int i = 0; i < batchSize; i++)
                for (int j = 0; j < totalNeurons; j++)
                    this.devBiases[j] += this.devInputs[i, j];

            // calculate weights gradient
            this.devWeights = HelperMethods.Transpose(HelperMethods.DotProduct(HelperMethods.Transpose(this.inputs), this.devInputs));

            // regularization
            this.devWeights = LossFunction.Regularization.BackwardWeights(this.weights, this.devWeights, this.WeightRegulizer_L1, this.WeightRegulizer_L2);
            this.devBiases = LossFunction.Regularization.BackwardBiases(this.biases, this.devBiases, this.biasRegulizer_L1, this.biasRegulizer_L2);

            // calculate gradient on next layer
            this.devOutputs = HelperMethods.DotProduct(this.devInputs, this.weights);
        }

        /// <summary>
        /// Call to the activation function for corresponding neuron layer for forward computation.
        /// </summary>
        /// <param name="inputs">The outputs of the layer prior to activation.</param>
        public void ActivationForward(double[,] inputs)
        {
            switch (this.activation)
            {
                case "input": this.outputs = inputs; break;
                case "relu": this.outputs = ActivationFunction.ReLU.Forward(inputs); break;
                case "leakyrelu": this.outputs = ActivationFunction.LeakyReLU.Forward(inputs); break;
                case "parametricrelu": this.outputs = ActivationFunction.ParametricReLU.Forward(inputs); break;
                case "softmax": this.outputs = ActivationFunction.Softmax.Forward(inputs); break;
                case "sigmoid": this.outputs = ActivationFunction.Sigmoid.Forward(inputs); break;
                case "tanh": this.outputs = ActivationFunction.TanH.Forward(inputs); break;
                case "linear": this.outputs = ActivationFunction.Linear.Forward(inputs); break;
                case "elu": this.outputs = ActivationFunction.ELU.Forward(inputs); break;
                case "gelu": this.outputs = ActivationFunction.GELU.Forward(inputs); break;
                default: throw new System.InvalidOperationException ("Not a valid activation function.");
            }
        }

        /// <summary>
        /// Call to the activation function for the corresponding neuron layer for backpropagation.
        /// </summary>
        /// <param name="recievedGradient">The gradient that has been passed onto the activation function.</param>
        public void ActivationBackward(double[,] recievedGradient)
        {
            switch (this.activation)
            {
                case "input": break;
                case "relu": this.devInputs = ActivationFunction.ReLU.Backward(recievedGradient, this.outputs); break;
                case "leakyrelu": this.devInputs = ActivationFunction.LeakyReLU.Backward(recievedGradient, this.outputs); break;
                case "parametricrelu": this.devInputs = ActivationFunction.ParametricReLU.Backward(recievedGradient, this.outputs); break;
                case "softmax": this.devInputs = ActivationFunction.Softmax.Backward(recievedGradient, this.outputs); break;
                case "sigmoid": this.devInputs = ActivationFunction.Sigmoid.Backward(recievedGradient, this.outputs); break;
                case "tanh": this.devInputs = ActivationFunction.TanH.Backward(recievedGradient, this.outputs); break;
                case "linear": this.devInputs = ActivationFunction.Linear.Backward(recievedGradient, this.outputs); break;
                case "elu": this.devInputs = ActivationFunction.ELU.Backward(recievedGradient, this.outputs); break;
                case "gelu": this.devInputs = ActivationFunction.GELU.Backward(recievedGradient, this.outputs); break;
                default: throw new System.InvalidOperationException ("Not a valid activation function.");
            }
        }

        /// <summary>
        /// Call to each layers corresponding dropout layer to set dead neurons.
        /// </summary>
        public void DropoutForward()
        {
            this.outputs = this.dropout.Forward(this.outputs);
        }

        /// <summary>
        /// Call to each layers corresponding dropout layer back propagation.
        /// </summary>
        public void DropoutBackward(double[,] recievedGradient)
        {
            this.devInputs = this.dropout.Backward(recievedGradient);
        }

        /// <summary>
        /// Call to each layers corresponding optimizer.
        /// </summary>
        public void Optimize()
        {
            switch (this.Optimizer)
            {
                case "stochastic_gradient_descent": 
                    this.weights = this.optimizerSGD.Weights(this.weights, this.devWeights);
                    this.biases = this.optimizerSGD.Biases(this.biases, this.devBiases);
                    break;
                case "adaptive_gradient":
                    this.weights = this.optimizerAG.Weights(this.weights, this.devWeights);
                    this.biases = this.optimizerAG.Biases(this.biases, this.devBiases);
                    break;
                case "root_mean_square_propegation":
                    this.weights = this.optimizerRMSP.Weights(this.weights, this.devWeights);
                    this.biases = this.optimizerRMSP.Biases(this.biases, this.devBiases);
                    break;
                case "adaptive_momentum": 
                    this.weights = this.optimizerAM.Weights(this.weights, this.devWeights);
                    this.biases = this.optimizerAM.Biases(this.biases, this.devBiases);
                    break;
                default: throw new System.InvalidOperationException ("Invalid optimizer");
            }
        }

        /// <summary>
        /// Get the optimizer parameters of current neuron layer.
        /// </summary>
        /// <returns>A string of the optimizer parameters of current neuron layer.</return>
        public string GetOptimizerParameters()
        {
            string parameters = "";

            switch (this.Optimizer)
            {
                case "stochastic_gradient_descent": 
                    parameters = this.optimizerSGD.GetParameters();
                    break;
                case "adaptive_gradient":
                    parameters = this.optimizerAG.GetParameters();
                    break;
                case "root_mean_square_propegation":
                    parameters = this.optimizerRMSP.GetParameters();
                    break;
                case "adaptive_momentum": 
                    parameters = this.optimizerAM.GetParameters();
                    break;
                default: throw new System.InvalidOperationException ("Invalid optimizer");
            }

            return parameters;
        }

        /// <summary>
        /// Get the dropout parameters of current neuron layer.
        /// </summary>
        /// <returns>A string of the dropout parameters of current neuron layer.</return>
        public string GetDropoutParameters()
        {
            string parameters = "";

            parameters = "Dropout Rate: " + System.Math.Round((1 - this.dropout.Rate), 4) + "\n";

            return parameters;
        }
    }

    /// <summary>
    /// Call to the loss function.
    /// </summary>
    /// <param name="output">The final outputs of the neural network.</param>
    /// <param name="target">The one-hot encoded target classes.</param>
    public void LossForward(double[,] output, double[,] target)
    {
        switch (Config.LossFunction)
        {
            case "categorical_crossentropy": this.Loss = this.LossCCE.Forward(output, target); break;
            case "binary_categorical_crossentropy": this.Loss = this.LossBCCE.Forward(output, target); break;
            case "squared_error": this.Loss = this.LossSE.Forward(output, target); break;
            case "mean_squared_error": this.Loss = this.LossMSE.Forward(output, target); break;
            case "mean_absolute_error": this.Loss = this.LossMAE.Forward(output, target); break;
            default: throw new System.InvalidOperationException ("Not a valid loss function.");
        }
    }

    /// <summary>
    /// Call to the loss function to calculate derivative of neural network.
    /// </summary>
    /// <param name="output">The final outputs of the neural network.</param>
    /// <param name="target">The one-hot encoded target classes.</param>
    /// <returns> 
    /// The loss gradient that will be passed onto the neural network as a 2d array.
    /// </returns>
    public double[,] LossBackward(double[,] output, double[,] target)
    {
        double[,] lossGradient;

        switch (Config.LossFunction)
        {
            case "categorical_crossentropy": lossGradient = this.LossCCE.Backward(output, target); break;
            case "binary_categorical_crossentropy": lossGradient = this.LossBCCE.Backward(output, target); break;
            case "squared_error": lossGradient = this.LossSE.Backward(output, target); break;
            case "mean_squared_error": lossGradient = this.LossMSE.Backward(output, target); break;
            case "mean_absolute_error": lossGradient = this.LossMAE.Backward(output, target); break;
            default: throw new System.InvalidOperationException ("Not a valid loss function.");
        }

        return lossGradient;
    }

    /// <summary>
    /// Call to the accuracy function of neural network.
    /// </summary>
    /// <param name="output">The final outputs of the neural network.</param>
    /// <param name="target">The target classes.</param>
    /// <returns>The calculated accuracy of network.</returns>
    public double CalculateAccuracy(double[,] output, double[,] target)
    {
        double accuracy = 0;

        switch (Config.AccuracyFunction)
        {
            case "true_class_mean": accuracy = this.AccuracyTCM.Compare(output, target); break;
            default: throw new System.InvalidOperationException ("Not a valid accuracy function.");
        }

        this.Accuracy = accuracy;

        return accuracy;
    }

    [System.Runtime.Serialization.DataContract]
    private class ActivationFunction
    {
        [System.Runtime.Serialization.DataContract]
        public class GELU
        {
            public GELU() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int row = input.GetLength(0);
                int col = input.GetLength(1);

                double[,] output = new double[row, col];

                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        output[i, j] = 0.5 * input[i, j] * (System.Math.Tanh(System.Math.Sqrt(2 / System.Math.PI) * (input[i, j] + System.Math.Pow(0.044715, 3))));

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer function. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = recievedGradient.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] outputGradient = recievedGradient;

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        double x = output[set, neuron];

                        double val = 0.0356774 * System.Math.Pow(x, 3) + 0.797885 * x;

                        outputGradient[set, neuron] *= 0.5 * System.Math.Tanh(val) + (0.0535161 * System.Math.Pow(x, 3) + 0.398942 * x) * System.Math.Pow(1 / System.Math.Cosh(val), 2) + 0.5;
                    }
                }

                return outputGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class ELU
        {
            public ELU() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int row = input.GetLength(0);
                int col = input.GetLength(1);

                double[,] output = new double[row, col];

                double C = 1;

                for (int i = 0; i < row; i++)
                {
                    for (int j = 0; j < col; j++)
                    {
                        if (input[i, j] < 0)
                            output[i, j] = C * (System.Math.Exp(input[i, j]) - 1);
                        else
                            output[i, j] = input[i, j];
                    }
                }

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer function. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = recievedGradient.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] outputGradient = recievedGradient;

                double C = 1;

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        if (output[set, neuron] < 0)
                            outputGradient[set, neuron] *= System.Math.Exp(output[set, neuron]) * C;
                    }
                }

                return outputGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class ParametricReLU
        {
            public ParametricReLU() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int row = input.GetLength(0);
                int col = input.GetLength(1);

                double[,] output = new double[row, col];

                double C = 0.0001;

                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        output[i, j] = System.Math.Max(input[i, j], input[i, j] * C);

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer function. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = recievedGradient.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] outputGradient = recievedGradient;

                double C = 0.0001;

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        // if the output of the neuron is less than 0, set the output gradient to be multiplied by the constant
                        // otherwise leave the gradient the same
                        if (output[set, neuron] <= 0)
                            outputGradient[set, neuron] = output[set, neuron] * C;
                    }
                }

                return outputGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Linear
        {
            public Linear() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                return input;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer function. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                return recievedGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class TanH
        {
            public TanH() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int row = input.GetLength(0);
                int col = input.GetLength(1);

                double[,] output = new double[row, col];

                // any output that is less than 0 is set to 0
                for (int i = 0; i < row; i++)
                {
                    for (int j = 0; j < col; j++)
                    {
                        double exp1 = System.Math.Exp(input[i, j]);
                        double exp2 = System.Math.Exp(-input[i, j]);
                        output[i, j] = (exp1 - exp2) / (exp1 + exp2);
                    }
                }

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer function. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = recievedGradient.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] outputGradient = recievedGradient;

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        double exp = System.Math.Exp(output[set, neuron] * 2);

                        outputGradient[set, neuron] = recievedGradient[set, neuron] * (4 * exp) / System.Math.Pow(exp + 1, 2);
                    }
                }

                return outputGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class ReLU
        {
            public ReLU() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int row = input.GetLength(0);
                int col = input.GetLength(1);

                double[,] output = new double[row, col];

                // any output that is less than 0 is set to 0
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        output[i, j] = System.Math.Max(input[i, j], 0);

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer function. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = recievedGradient.GetLength(0);
                int totalNeurons = recievedGradient.GetLength(1);

                double[,] outputGradient = recievedGradient;

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        // if the output of the neuron is less than 0, set the output gradient to 0
                        // otherwise leave the gradient the same
                        if (output[set, neuron] <= 0)
                            outputGradient[set, neuron] = 0;
                    }
                }

                return outputGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class LeakyReLU
        {
            public LeakyReLU() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int row = input.GetLength(0);
                int col = input.GetLength(1);

                double[,] output = new double[row, col];

                double C = 0.0001;

                // any output that is less than 0 is set to input * constant
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        if (input[i, j] < 0)
                            output[i, j] = input[i, j] * C;
                        else 
                            output[i, j] = input[i, j];

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer function. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = recievedGradient.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] outputGradient = recievedGradient;

                double C = 0.0001;

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        // if the output of the neuron is less than 0, set the output gradient to 0
                        // otherwise leave the gradient the same
                        if (output[set, neuron] <= 0)
                            outputGradient[set, neuron] *= C;
                    }
                }

                return outputGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Softmax
        {
            public Softmax() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <remarks>e^(input(i) - max) / sum</remarks>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int batchSize = input.GetLength(0);
                int totalNeurons = input.GetLength(1);
                
                double[,] output = new double[batchSize, totalNeurons];

                for (int set = 0; set < batchSize; set++)
                {
                    // initialize the array of exponent values based on the single sample size
                    double[] expValues = new double[totalNeurons];
                    double sum = 0;
                    // set max to the first neurons output
                    double max = input[set, 0];

                    for (int neuron = 1; neuron < totalNeurons; neuron++)
                    {
                        // set new maximum output in new maximum is found
                        if (input[set, neuron] > max)
                            max = input[set, neuron];
                    }

                    // calculate each exponent of the neuron layer outputs
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        expValues[neuron] = System.Math.Exp(input[set, neuron] - max);
                        sum += expValues[neuron];
                    }

                    // divide by the sum to set the sum of outputs to 1
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        output[set, neuron] = expValues[neuron] / sum;
                    }
                }

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = recievedGradient.GetLength(0);
                int matrixSize = output.GetLength(1);

                double[,] outputGradient = new double[batchSize, matrixSize];

                for (int set = 0; set < batchSize; set++)
                {
                    double[,] diagFlat = new double[matrixSize, matrixSize];
                    double[,] jacobianMatrix = new double[matrixSize, matrixSize];

                    // the diagFlat array is filled with zeros except for a diagonal line set to the output values
                    // ie: [0,0] [1,1] [2,2] etc.
                    for (int i = 0; i < matrixSize; i++)
                        diagFlat[i, i] = output[set, i];

                    // calculate the jacobianMatrix my multipling the soft max outputs iterating over j and k indicies respectively
                    // then subtracting that from the respective diagFlat indicies
                    for (int i = 0; i < matrixSize; i++)
                        for (int j = 0; j < matrixSize; j++)
                            jacobianMatrix[i, j] = diagFlat[i, j] - (output[set, i] * output[set, j]);

                    // turn the 2d array into a 1d array by calculating the dot product of the jacobianMatrix and the loss function derivative
                    for (int i = 0; i < matrixSize; i++)
                        for (int j = 0; j < matrixSize; j++)
                            outputGradient[set, i] += jacobianMatrix[i, j] * recievedGradient[set, j];
                }

                return outputGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Sigmoid
        {
            public Sigmoid() { }

            /// <summary>
            /// Calculate the outputs of the neuron layer based on outputs prior to activation function.
            /// </summary>
            /// <param name="input">The outputs of the neuron layer prior to activation.</param>
            /// <remarks>1 / (1 + e^-x)</remarks>
            /// <returns> 
            /// The post activation outputs of the neuron layer as a 2d array.
            /// </returns>
            public static double[,] Forward(double[,] input)
            {
                int batchSize = input.GetLength(0);
                int totalNeurons = input.GetLength(1);
                
                double[,] output = new double[batchSize, totalNeurons];

                for (int set = 0; set < batchSize; set++)
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                        output[set, neuron] = 1 / (1 + System.Math.Exp(-input[set, neuron]));

                return output;
            }

            /// <summary>
            /// Calculate the derivative of the activation function.
            /// </summary>
            /// <param name="recievedGradient"> The gradient recieved from prior layer. </param>
            /// <param name="outputs"> The outputs of the current layer post activation. </param>
            /// <remarks>e^x / (e^x + 1)^2</remarks>
            /// <returns> 
            /// The gradient to be passed to the corresponding neuron layer as a 2d array.
            /// </returns>
            public static double[,] Backward(double[,] recievedGradient, double[,] output)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);
                
                double[,] outputGradient = recievedGradient;

                for (int set = 0; set < batchSize; set++)
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                        outputGradient[set, neuron] *= (System.Math.Exp(output[set, neuron]) / System.Math.Pow(System.Math.Exp(output[set, neuron]) + 1, 2));

                return outputGradient;
            }
        }
    }

    [System.Runtime.Serialization.DataContract]
    public class LossFunction
    {
        [System.Runtime.Serialization.DataContract]
        public class Regularization
        {
            public Regularization() { }

            /// <summary>
            /// Calculate regularization loss of network parameters.
            /// </summary>
            /// <param name="layer">The layer whose lambda values are being use to evaluate the weights and biases.</param>
            /// <returns> 
            /// A double as the regularization loss.
            /// </returns>
            public static double ForwardWeights(double[,] weights, double L1, double L2)
            {
                double regularizationLoss = 0;

                if (L1 > 0)
                {
                    int row = weights.GetLength(0);
                    int col = weights.GetLength(1);

                    double weightsSum = 0;

                    for (int i = 0; i < row; i++)
                        for (int j = 0; j < col; j++)
                            weightsSum += System.Math.Abs(weights[i, j]);

                    regularizationLoss += L1 * weightsSum;
                }

                if (L2 > 0)
                {
                    int row = weights.GetLength(0);
                    int col = weights.GetLength(1);

                    double weightsSum = 0;

                    for (int i = 0; i < row; i++)
                        for (int j = 0; j < col; j++)
                            weightsSum += System.Math.Pow(weights[i, j], 2);
                    
                    regularizationLoss += L2 * weightsSum;
                }

                return regularizationLoss;
            }

            /// <summary>
            /// Calculate regularization loss of network parameters.
            /// </summary>
            /// <param name="layer">The layer whose lambda values are being use to evaluate the weights and biases.</param>
            /// <returns> 
            /// A double as the regularization loss.
            /// </returns>
            public static double ForwardBiases(double[] biases, double L1, double L2)
            {
                double regularizationLoss = 0;

                if (L1 > 0)
                {
                    int length = biases.Length;

                    double biasesSum = 0;

                    for (int i = 0; i < length; i++)
                        biasesSum += System.Math.Abs(biases[i]);

                    regularizationLoss += L1 * biasesSum;
                }

                if (L2 > 0)
                {
                    int length = biases.Length;

                    double biasesSum = 0;

                    for (int i = 0; i < length; i++)
                        biasesSum += System.Math.Pow(biases[i], 2);

                    regularizationLoss += L2 * biasesSum;
                }

                return regularizationLoss;
            }

            /// <summary>
            /// Calculate the new weights gradient with regularization.
            /// </summary>
            /// <param name="target">The current layer whos weights gradient is being adjusted by regularization.</param>
            /// <returns> 
            /// A 2d array of the adjusted weights gradient.
            /// </returns>
            public static double[,] BackwardWeights(double[,] weights, double[,] devWeights, double L1, double L2)
            {
                int row = weights.GetLength(0);
                int col = weights.GetLength(1);

                double[,] newWeightsGradient = devWeights;

                if (L1 > 0)
                {
                    for (int i = 0; i < row; i++)
                    {
                        for (int j = 0; j < col; j++)
                        {
                            if (weights[i, j] < 0)
                                newWeightsGradient[i, j] += L1 * -1;
                            else
                                newWeightsGradient[i, j] += L1 * 1;
                        }
                    }
                }

                if (L2 > 0)
                {
                    for (int i = 0; i < row; i++)
                    {
                        for (int j = 0; j < col; j++)
                        {
                            newWeightsGradient[i, j] += 2 * L2 * weights[i, j];
                        }
                    }
                }

                return newWeightsGradient;
            }

            /// <summary>
            /// Calculate the new biases gradient with regularization.
            /// </summary>
            /// <param name="target">The current layer whos biases gradient is being adjusted by regularization.</param>
            /// <returns> 
            /// A 2d array of the adjusted biases gradient.
            /// </returns>
            public static double[] BackwardBiases(double[] biases, double[] devBiases, double L1, double L2)
            {
                int length = biases.Length;

                double[] newBiasesGradient = devBiases;

                if (L1 > 0)
                {
                    for (int i = 0; i < length; i++)
                    {
                        if (biases[i] < 0)
                            newBiasesGradient[i] += L1 * -1;
                        else
                            newBiasesGradient[i] += L1 * 1;
                    }
                }

                if (L2 > 0)
                {
                    for (int i = 0; i < length; i++)
                    {
                        newBiasesGradient[i] += 2 * L2 * biases[i];
                    }
                }

                return newBiasesGradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Squared_Error : LossFunctionHolder
        {   
            public Squared_Error() { }

            /// <summary>
            /// Calculate the loss of each set of outputs.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">The one-hot encoded target classes.</param>
            /// <remarks>SUM(output(i) - target(i)^2)</remarks>
            /// <returns> 
            /// A 1d array of the corresponding loss to each data set.
            /// </returns>
            public double[] Forward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[] loss = new double[batchSize];

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        loss[set] += System.Math.Pow(output[set, neuron] - target[set, neuron], 2);

                        if (System.Double.IsInfinity(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is Infinite.");
                        }
                        else if (System.Double.IsNaN(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is NaN.");
                        }
                    }
                }

                return loss;
            }

            /// <summary>
            /// Calculate the derivative of the squared error loss function.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">The one-hot encoded target classes.</param>
            /// <remarks>2 * (output(i) - target(i))</remarks>
            /// <returns> 
            /// The gradient that will be passed onto the neural network as a 2d array.
            /// </returns>
            public double[,] Backward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] gradient = new double[batchSize, totalNeurons];

                for (int set = 0; set < batchSize; set++)
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                        gradient[set, neuron] = 2 * (output[set, neuron] - target[set, neuron]) / batchSize;

                return gradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Mean_Absolute_Error : LossFunctionHolder
        {   
            public Mean_Absolute_Error() { }

            /// <summary>
            /// Calculate the loss of each set of outputs.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">Target values.</param>
            /// <returns> 
            /// A 1d array of the corresponding loss to each data set.
            /// </returns>
            public double[] Forward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[] loss = new double[batchSize];

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        loss[set] += System.Math.Abs(target[set, neuron] - output[set, neuron]);

                        if (System.Double.IsInfinity(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is Infinite.");
                        }
                        else if (System.Double.IsNaN(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is NaN.");
                        }
                    }

                    loss[set] /= totalNeurons;
                }

                return loss;
            }

            /// <summary>
            /// Calculate the derivative of the loss function.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">Target values.</param>
            /// <returns> 
            /// The gradient that will be passed onto the neural network as a 2d array.
            /// </returns>
            public double[,] Backward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] gradient = new double[batchSize, totalNeurons];

                for (int set = 0; set < batchSize; set++)
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                        gradient[set, neuron] = System.Math.Sign(target[set, neuron] - output[set, neuron]) / totalNeurons / batchSize;

                return gradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Mean_Squared_Error : LossFunctionHolder
        {   
            public Mean_Squared_Error() { }

            /// <summary>
            /// Calculate the loss of each set of outputs.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">Target values.</param>
            /// <returns> 
            /// A 1d array of the corresponding loss to each data set.
            /// </returns>
            public double[] Forward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[] loss = new double[batchSize];

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        loss[set] += System.Math.Pow(target[set, neuron] - output[set, neuron], 2);

                        if (System.Double.IsInfinity(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is Infinite.");
                        }
                        else if (System.Double.IsNaN(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is NaN.");
                        }
                    }

                    loss[set] /= totalNeurons;
                }

                return loss;
            }

            /// <summary>
            /// Calculate the derivative of the loss function.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">Target values.</param>
            /// <returns> 
            /// The gradient that will be passed onto the neural network as a 2d array.
            /// </returns>
            public double[,] Backward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] gradient = new double[batchSize, totalNeurons];

                for (int set = 0; set < batchSize; set++)
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                        gradient[set, neuron] = -2 * (target[set, neuron] - output[set, neuron]) / totalNeurons / batchSize;

                return gradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Categorical_Crossentropy : LossFunctionHolder
        {
            public Categorical_Crossentropy() { }

            /// <summary>
            /// Calculate the loss of each set of outputs.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">The one-hot encoded target classes.</param>
            /// <remarks>
            /// SUM(-Log(output(i) * target(i)))
            /// Note: log is the natural log
            /// </remarks>
            /// <returns> 
            /// A 1d array of the corresponding loss to each data set.
            /// </returns>
            public double[] Forward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[] loss = new double[batchSize];

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        if (target[set, neuron] > 0)
                        {
                            // clip values to be no larger than 0.9999999 and no smaller than 0.0000001
                            // to prevent calculation issues
                            if (output[set, neuron] > 1 - 1E-7)
                                output[set, neuron] = 1 - 1E-7;
                            else if (output[set, neuron] < 1E-7)
                                output[set, neuron] = 1E-7;

                            // because target classes are one-hot encoded their values are either 0 or 1
                            // so we can avoid calculating anything that is multiplied by 0 and we don't need to multiply by 1
                            loss[set] = -System.Math.Log(output[set, neuron]);

                            if (System.Double.IsInfinity(loss[set]))
                            {
                                System.Console.WriteLine("Output: " + output[set, neuron]);
                                throw new System.InvalidOperationException ("Loss is Infinite.");
                            }
                            else if (System.Double.IsNaN(loss[set]))
                            {
                                System.Console.WriteLine("Output: " + output[set, neuron]);
                                throw new System.InvalidOperationException ("Loss is NaN.");
                            }
                        }
                    }
                }

                return loss;
            }

            /// <summary>
            /// Calculate the derivative of the loss function.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">The one-hot encoded target classes.</param>
            /// <remarks>-SUM(target(i) / output(i) / batchsize)</remarks>
            /// <returns> 
            /// The gradient that will be passed onto the neural network as a 2d array.
            /// </returns>
            public double[,] Backward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] gradient = new double[batchSize, totalNeurons];

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        // clip values to be no larger than 0.9999999 and no smaller than 0.0000001
                        // to prevent calculation issues
                        if (output[set, neuron] > 1 - 1E-7)
                            output[set, neuron] = 1 - 1E-7;
                        else if (output[set, neuron] < 1E-7)
                            output[set, neuron] = 1E-7;
                            
                        gradient[set, neuron] = -target[set, neuron] / output[set, neuron] / batchSize;
                    }
                }

                return gradient;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Binary_Categorical_Crossentropy : LossFunctionHolder
        {
            public Binary_Categorical_Crossentropy() { }

            /// <summary>
            /// Calculate the loss of each set of outputs.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">Binary target values.</param>
            /// <returns> 
            /// A 1d array of the corresponding loss to each data set.
            /// </returns>
            public double[] Forward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[] loss = new double[batchSize];

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        // clip values to be no larger than 0.9999999 and no smaller than 0.0000001
                        // to prevent calculation issues
                        if (output[set, neuron] > 1 - 1E-7)
                            output[set, neuron] = 1 - 1E-7;
                        else if (output[set, neuron] < 1E-7)
                            output[set, neuron] = 1E-7;

                        loss[set] -= target[set, neuron] * System.Math.Log(output[set, neuron]) + (1 - target[set, neuron]) * System.Math.Log(1 - output[set, neuron]);

                        if (System.Double.IsInfinity(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is Infinite.");
                        }
                        else if (System.Double.IsNaN(loss[set]))
                        {
                            System.Console.WriteLine("Output: " + output[set, neuron]);
                            throw new System.InvalidOperationException ("Loss is NaN.");
                        }
                    }

                    loss[set] /= totalNeurons;
                }

                return loss;
            }

            /// <summary>
            /// Calculate the derivative of the loss function.
            /// </summary>
            /// <param name="output">The final outputs of the neural network.</param>
            /// <param name="target">Binary target values.</param>
            /// <returns> 
            /// The gradient that will be passed onto the neural network as a 2d array.
            /// </returns>
            public double[,] Backward(double[,] output, double[,] target)
            {
                int batchSize = output.GetLength(0);
                int totalNeurons = output.GetLength(1);

                double[,] gradient = new double[batchSize, totalNeurons];

                for (int set = 0; set < batchSize; set++)
                {
                    for (int neuron = 0; neuron < totalNeurons; neuron++)
                    {
                        // clip values to be no larger than 0.9999999 and no smaller than 0.0000001
                        // to prevent calculation issues
                        if (output[set, neuron] > 1 - 1E-7)
                            output[set, neuron] = 1 - 1E-7;
                        else if (output[set, neuron] < 1E-7)
                            output[set, neuron] = 1E-7;
                            
                        gradient[set, neuron] -= (target[set, neuron] / output[set, neuron] - (1 - target[set, neuron]) / (1 - output[set, neuron])) / totalNeurons / batchSize;
                    }
                }

                return gradient;
            }
        }
    }

    [System.Runtime.Serialization.DataContract]
    public class OptimizerFunction
    {
        [System.Runtime.Serialization.DataContract]
        public class Stochastic_Gradient_Descent : OptimizerFunctionHolder
        {
            #region Variables
            private double learningRate;

            private double decayRate;

            private double momentumRate;

            private double currentLearningRate;

            private double currentIteration = 0;

            private double[,] weightsMomentum;

            private double[] biasesMomentum;

            [System.Runtime.Serialization.DataMember]
            public double LearningRate { get {return learningRate;} set { learningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double DecayRate { get {return decayRate;} set { decayRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double MomentumRate { get {return momentumRate;} set { momentumRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentLearningRate { get {return currentLearningRate;} set { currentLearningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentIteration { get {return currentIteration;} set { currentIteration = value; } }

            public double[,] WeightsMomentum { get {return weightsMomentum;} private set { weightsMomentum = value; } }

            [System.Runtime.Serialization.DataMember]
            public double[] BiasesMomentum { get {return biasesMomentum;} private set { biasesMomentum = value; }}

            [System.Runtime.Serialization.DataMember]
            private double[][] serializedWeightsMomentum;

            /// <summary>
            /// Turn multi-dimensional arrays into jagged arrays so they can be serialized.
            /// </summary>
            [System.Runtime.Serialization.OnSerializing]
            private void BeforeSerializing(System.Runtime.Serialization.StreamingContext context)
            {
                if (this.weightsMomentum != null)
                {
                    int row = this.weightsMomentum.GetLength(0);
                    int col = this.weightsMomentum.GetLength(1);

                    this.serializedWeightsMomentum = new double[row][];

                    for (int i = 0; i < row; i++)
                    {
                        this.serializedWeightsMomentum[i] = new double[col];

                        for (int j = 0; j < col; j++)
                        {
                            this.serializedWeightsMomentum[i][j] = this.weightsMomentum[i, j];
                        }
                    }
                }
            }

            /// <summary>
            /// Turn serialized jagged arrays into originally shaped multi-dimensional arrays.
            /// </summary>                
            [System.Runtime.Serialization.OnDeserialized]
            private void AfterDeserializing(System.Runtime.Serialization.StreamingContext ctx)
            {
                if (this.serializedWeightsMomentum == null)
                {
                    this.weightsMomentum = null;
                }
                else
                {
                    int row = this.serializedWeightsMomentum.Length;
                    int col = this.serializedWeightsMomentum[0].Length;

                    for (int i = 1; i < row; i++)
                    {
                        if (this.serializedWeightsMomentum[i].Length != col)
                        {
                            throw new System.InvalidOperationException("The serialized array does not match the multi-dimensional one");
                        }
                    }

                    this.weightsMomentum = new double[row, col];

                    for (int i = 0; i < row; i++)
                    {
                        for (int j = 0; j < col; j++)
                        {
                            this.weightsMomentum[i, j] = this.serializedWeightsMomentum[i][j];
                        }
                    }
                }
            }
            #endregion

            public Stochastic_Gradient_Descent(double learningRate = 1, double decay = 0, double momentum = 0)
            {
                this.learningRate = learningRate;
                this.momentumRate = momentum;
                this.decayRate = decay;
            }

            public Stochastic_Gradient_Descent(Stochastic_Gradient_Descent SGD)
            {
                this.learningRate = SGD.learningRate;
                this.momentumRate = SGD.momentumRate;
                this.decayRate = SGD.decayRate;
            }

            public Stochastic_Gradient_Descent() { }

            /// <summary>
            /// Get the parameters for optimizer.
            /// </summary>
            /// <returns>A string of the parameters for optimizer.</returns>
            public string GetParameters()
            {
                string parameters = "Optimizer: stochastic_gradient_descent\n" +
                                    "    Learning Rate: " + this.learningRate + "\n" +
                                    "    Decay Rate: " + this.decayRate + "\n" +
                                    "    Momentum Rate: " + this.momentumRate + "\n";

                return parameters;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the weights.
            /// </summary>
            /// <param name="weights">The current weights of neuron layer.</param>
            /// <param name="devWeights">The current gradient being passed onto the weights.</param>
            /// <returns>The new adjusted weights.</returns>
            public double[,] Weights(double[,] weights, double[,] devWeights)
            {
                int row = weights.GetLength(0);
                int col = weights.GetLength(1);

                double[,] weightsUpdate = new double[row, col];

                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;

                Config.LearningRate = this.currentLearningRate;

                if (this.momentumRate > 0)
                {
                    // one first iteration the biasMomentums array will be null
                    // so set it to an instance and fill it with zeros
                    if (this.weightsMomentum == null)
                        this.weightsMomentum = new double[row, col];
                    
                    // calculate gradient
                    for (int j = 0; j < row; j++)
                        for (int k = 0; k < col; k++)
                            weightsUpdate[j, k] = this.weightsMomentum[j, k] * this.momentumRate - devWeights[j, k] * this.currentLearningRate;
                    
                    // update momentums for next iteration
                    this.weightsMomentum = weightsUpdate;
                }
                else // vanilla SGD
                {
                    // calculate gradient
                    for (int j = 0; j < row; j++)
                        for (int k = 0; k < col; k++)
                            weightsUpdate[j, k] = -devWeights[j, k] * this.currentLearningRate;
                }

                // update weights
                for (int j = 0; j < row; j++)
                    for (int k = 0; k < col; k++)
                        weights[j, k] += weightsUpdate[j, k];

                this.currentIteration++;

                return weights;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the biases.
            /// </summary>
            /// <param name="biases">The current biases of neuron layer.</param>
            /// <param name="devBiases">The current gradient being passed onto the biases.</param>
            /// <returns>The new adjusted biases.</returns>
            public double[] Biases(double[] biases, double[] devBiases)
            {
                int length = biases.Length;

                double[] biasesUpdates = new double[length];

                // no need to update learning rate since we did that for the weights
                /*
                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;
                */

                if (this.momentumRate > 0)
                {
                    // one first iteration the biasMomentums array will be null
                    // so set it to an instance and fill it with zeros
                    if (this.biasesMomentum == null)
                        this.biasesMomentum = new double[length];

                    // calculate gradient
                    for (int i = 0; i < length; i++)
                        biasesUpdates[i] = this.biasesMomentum[i] * this.momentumRate - devBiases[i] * this.currentLearningRate;

                    // update momentums for next iteration
                    this.biasesMomentum = biasesUpdates;
                }
                else // vanilla SGD
                {
                    // calculate gradient
                    for (int i = 0; i < length; i++)
                        biasesUpdates[i] = -devBiases[i] * this.currentLearningRate;
                }

                // update biases
                for (int i = 0; i < length; i++)
                    biases[i] += biasesUpdates[i];

                // no need to update iteration since we did that for the weights
                //this.currentIteration++;

                return biases;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Adaptive_Gradient : OptimizerFunctionHolder
        {
            #region Variables
            [System.Runtime.Serialization.DataMember]
            private double learningRate;

            [System.Runtime.Serialization.DataMember]
            private double decayRate;

            [System.Runtime.Serialization.DataMember]
            private double epsilon;

            [System.Runtime.Serialization.DataMember]
            private double currentLearningRate;

            [System.Runtime.Serialization.DataMember]
            private double currentIteration = 0;

            private double[,] weightsCache;

            [System.Runtime.Serialization.DataMember]
            private double[] biasesCache;

            [System.Runtime.Serialization.DataMember]
            public double LearningRate { get {return learningRate;} set { learningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double DecayRate { get {return decayRate;} set { decayRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double Epsilon { get {return epsilon;} set { epsilon = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentLearningRate { get {return currentLearningRate;} set { currentLearningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentIteration { get {return currentIteration;} set { currentIteration = value; } }

            public double[,] WeightsCache { get {return weightsCache;} }

            [System.Runtime.Serialization.DataMember]
            public double[] BiasesCache { get {return biasesCache;} }

            [System.Runtime.Serialization.DataMember]
            private double[][] serializedWeightsCache;

            /// <summary>
            /// Turn multi-dimensional arrays into jagged arrays so they can be serialized.
            /// </summary>
            [System.Runtime.Serialization.OnSerializing]
            private void BeforeSerializing(System.Runtime.Serialization.StreamingContext context)
            {
                if (this.weightsCache != null)
                {
                    int row = this.weightsCache.GetLength(0);
                    int col = this.weightsCache.GetLength(1);

                    this.serializedWeightsCache = new double[row][];

                    for (int i = 0; i < row; i++)
                    {
                        this.serializedWeightsCache[i] = new double[col];

                        for (int j = 0; j < col; j++)
                        {
                            this.serializedWeightsCache[i][j] = this.weightsCache[i, j];
                        }
                    }
                }
            }

            /// <summary>
            /// Turn serialized jagged arrays into originally shaped multi-dimensional arrays.
            /// </summary>                
            [System.Runtime.Serialization.OnDeserialized]
            private void AfterDeserializing(System.Runtime.Serialization.StreamingContext ctx)
            {
                if (this.serializedWeightsCache == null)
                {
                    this.weightsCache = null;
                }
                else
                {
                    int row = this.serializedWeightsCache.Length;
                    int col = this.serializedWeightsCache[0].Length;

                    for (int i = 1; i < row; i++)
                    {
                        if (this.serializedWeightsCache[i].Length != col)
                        {
                            throw new System.InvalidOperationException("The serialized array does not match the multi-dimensional one");
                        }
                    }

                    this.weightsCache = new double[row, col];

                    for (int i = 0; i < row; i++)
                    {
                        for (int j = 0; j < col; j++)
                        {
                            this.weightsCache[i, j] = this.serializedWeightsCache[i][j];
                        }
                    }
                }
            }
            #endregion

            public Adaptive_Gradient(double learningRate = 1, double decay = 0, double epsilon = 1E-7)
            {
                this.learningRate = learningRate;
                this.epsilon = epsilon;
                this.decayRate = decay;
            }

            public Adaptive_Gradient(Adaptive_Gradient AG)
            {
                this.learningRate = AG.learningRate;
                this.epsilon = AG.epsilon;
                this.decayRate = AG.decayRate;
            }

            public Adaptive_Gradient() { }

            /// <summary>
            /// Get the parameters for optimizer.
            /// </summary>
            /// <returns>A string of the parameters for optimizer.</returns>
            public string GetParameters()
            {
                string parameters = "Optimizer: adaptive_gradient\n" +
                                    "    Learning Rate: " + this.learningRate + "\n" +
                                    "    Decay Rate: " + this.decayRate + "\n" +
                                    "    Epsilon: " + this.epsilon + "\n";

                return parameters;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the weights.
            /// </summary>
            /// <param name="weights">The current weights of neuron layer.</param>
            /// <param name="devWeights">The current gradient being passed onto the weights.</param>
            /// <returns>The new adjusted weights.</returns>
            public double[,] Weights(double[,] weights, double[,] devWeights)
            {
                int row = weights.GetLength(0);
                int col = weights.GetLength(1);

                // one first iteration the biasMomentums array will be null
                // so set it to an instance and fill it with zeros
                if (this.weightsCache == null)
                    this.weightsCache = new double[row, col];
                
                // update cache with squard gradient
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        this.weightsCache[i, j] += System.Math.Pow(devWeights[i, j], 2);

                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;

                Config.LearningRate = this.currentLearningRate;

                // calculate weights plus normalization with square rooted cache
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        weights[i, j] += -devWeights[i, j] * this.currentLearningRate / (System.Math.Sqrt(this.weightsCache[i, j]) + this.epsilon);

                this.currentIteration++;

                return weights;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the biases.
            /// </summary>
            /// <param name="biases">The current biases of neuron layer.</param>
            /// <param name="devBiases">The current gradient being passed onto the biases.</param>
            /// <returns>The new adjusted biases.</returns>
            public double[] Biases(double[] biases, double[] devBiases)
            {
                int length = biases.Length;

                // one first iteration the biasMomentums array will be null
                // so set it to an instance and fill it with zeros
                if (this.biasesCache == null)
                    this.biasesCache = new double[length];

                // update cache with squard gradient
                for (int i = 0; i < length; i++)
                    this.biasesCache[i] += System.Math.Pow(devBiases[i], 2);

                // we dont need to calculate the learning rate since this was done using the weights
                /*
                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;
                */

                // calculate biases plus normalization with square rooted cache
                for (int i = 0; i < length; i++)
                    biases[i] += -devBiases[i] * this.currentLearningRate / (System.Math.Sqrt(this.biasesCache[i]) + this.epsilon);

                // we dont need to calculate the learning rate since this was done using the weights
                // this.currentIteration++;

                return biases;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Root_Mean_Square_Propegation : OptimizerFunctionHolder
        {
            #region Variables
            private double learningRate;

            private double decayRate;

            private double epsilon;

            private double rho;

            private double currentLearningRate;

            private double currentIteration = 0;

            private double[,] weightsCache;

            private double[] biasesCache;

            [System.Runtime.Serialization.DataMember]
            public double LearningRate { get {return learningRate;} set { learningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double DecayRate { get {return decayRate;} set { decayRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double Epsilon { get {return epsilon;} set { epsilon = value; } }

            [System.Runtime.Serialization.DataMember]
            public double Rho { get {return rho;} set { rho = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentLearningRate { get {return currentLearningRate;} set { currentLearningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentIteration { get {return currentIteration;} set { currentIteration = value; } }

            public double[,] WeightsCache { get {return weightsCache;} private set { weightsCache = value; } }

            [System.Runtime.Serialization.DataMember]
            public double[] BiasesCache { get {return biasesCache;} private set { biasesCache = value; } }

            [System.Runtime.Serialization.DataMember]
            private double[][] serializedWeightsCache;

            /// <summary>
            /// Turn multi-dimensional arrays into jagged arrays so they can be serialized.
            /// </summary>
            [System.Runtime.Serialization.OnSerializing]
            private void BeforeSerializing(System.Runtime.Serialization.StreamingContext context)
            {
                if (this.weightsCache != null)
                {
                    int row = this.weightsCache.GetLength(0);
                    int col = this.weightsCache.GetLength(1);

                    this.serializedWeightsCache = new double[row][];

                    for (int i = 0; i < row; i++)
                    {
                        this.serializedWeightsCache[i] = new double[col];

                        for (int j = 0; j < col; j++)
                        {
                            this.serializedWeightsCache[i][j] = this.weightsCache[i, j];
                        }
                    }
                }
            }

            /// <summary>
            /// Turn serialized jagged arrays into originally shaped multi-dimensional arrays.
            /// </summary>                
            [System.Runtime.Serialization.OnDeserialized]
            private void AfterDeserializing(System.Runtime.Serialization.StreamingContext ctx)
            {
                if (this.serializedWeightsCache == null)
                {
                    this.weightsCache = null;
                }
                else
                {
                    int row = this.serializedWeightsCache.Length;
                    int col = this.serializedWeightsCache[0].Length;

                    for (int i = 1; i < row; i++)
                    {
                        if (this.serializedWeightsCache[i].Length != col)
                        {
                            throw new System.InvalidOperationException("The serialized array does not match the multi-dimensional one");
                        }
                    }

                    this.weightsCache = new double[row, col];

                    for (int i = 0; i < row; i++)
                    {
                        for (int j = 0; j < col; j++)
                        {
                            this.weightsCache[i, j] = this.serializedWeightsCache[i][j];
                        }
                    }
                }
            }
            #endregion

            public Root_Mean_Square_Propegation(double learningRate = 0.001, double decay = 0, double epsilon = 1E-7, double rho = 0.9)
            {
                this.learningRate = learningRate;
                this.epsilon = epsilon;
                this.decayRate = decay;
                this.rho = rho;
            }

            public Root_Mean_Square_Propegation(Root_Mean_Square_Propegation RMSP)
            {
                this.learningRate = RMSP.learningRate;
                this.epsilon = RMSP.epsilon;
                this.decayRate = RMSP.decayRate;
                this.rho = RMSP.rho;
            }

            public Root_Mean_Square_Propegation() { }

            /// <summary>
            /// Get the parameters for optimizer.
            /// </summary>
            /// <returns>A string of the parameters for optimizer.</returns>
            public string GetParameters()
            {
                string parameters = "Optimizer: root_mean_squared_propegation\n" +
                                    "    Learning Rate: " + this.learningRate + "\n" +
                                    "    Decay Rate: " + this.decayRate + "\n" +
                                    "    Epsilon: " + this.epsilon + "\n" + 
                                    "    Rho: " + this.rho + "\n";

                return parameters;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the weights.
            /// </summary>
            /// <param name="weights">The current weights of neuron layer.</param>
            /// <param name="devWeights">The current gradient being passed onto the weights.</param>
            /// <returns>The new adjusted weights.</returns>
            public double[,] Weights(double[,] weights, double[,] devWeights)
            {
                int row = devWeights.GetLength(0);
                int col = devWeights.GetLength(1);

                // one first iteration the biasMomentums array will be null
                // so set it to an instance and fill it with zeros
                if (this.weightsCache == null)
                    this.weightsCache = new double[row, col];
                
                // update cache with squard gradient
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        this.weightsCache[i, j] = this.weightsCache[i, j] * this.rho + (1 - this.rho) * System.Math.Pow(devWeights[i, j], 2);

                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;

                Config.LearningRate = this.currentLearningRate;

                // calculate weights plus normalization with square rooted cache
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        weights[i, j] += -devWeights[i, j] * this.currentLearningRate / (System.Math.Sqrt(this.weightsCache[i, j]) + this.epsilon);

                this.currentIteration++;

                return weights;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the biases.
            /// </summary>
            /// <param name="biases">The current biases of neuron layer.</param>
            /// <param name="devBiases">The current gradient being passed onto the biases.</param>
            /// <returns>The new adjusted biases.</returns>
            public double[] Biases(double[] biases, double[] devBiases)
            {
                int length = devBiases.Length;

                // one first iteration the biasMomentums array will be null
                // so set it to an instance and fill it with zeros
                if (this.biasesCache == null)
                    this.biasesCache = new double[length];

                // update cache with squard gradient
                for (int i = 0; i < length; i++)
                    this.biasesCache[i] = this.biasesCache[i] * this.rho + (1 - this.rho) * System.Math.Pow(devBiases[i], 2);

                // recalculation not needed since we already did this for the weights
                /*
                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;
                */

                // calculate biases plus normalization with square rooted cache
                for (int i = 0; i < length; i++)
                    biases[i] += -devBiases[i] * this.currentLearningRate / (System.Math.Sqrt(this.biasesCache[i]) + this.epsilon);

                // iteration adjustment not needed since we already did this for the weights
                // this.currentIteration++;

                return biases;
            }
        }

        [System.Runtime.Serialization.DataContract]
        public class Adaptive_Momentum : OptimizerFunctionHolder
        {
            #region Variables
            private double learningRate;

            private double decayRate;

            private double epsilon;

            private double beta1;

            private double beta2;

            private double currentLearningRate;

            private double currentIteration = 0;

            private double[,] weightsMomentum;

            private double[] biasesMomentum;

            private double[,] weightsCache;

            private double[] biasesCache;

            [System.Runtime.Serialization.DataMember]
            public double LearningRate { get {return learningRate;} set { learningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double DecayRate { get {return decayRate;} set { decayRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double Epsilon { get {return epsilon;} set { epsilon = value; } }

            [System.Runtime.Serialization.DataMember]
            public double Beta1 { get {return beta1;} set { beta1 = value; } }

            [System.Runtime.Serialization.DataMember]
            public double Beta2 { get {return beta2;} set { beta2 = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentLearningRate { get {return currentLearningRate;} set { currentLearningRate = value; } }

            [System.Runtime.Serialization.DataMember]
            public double CurrentIteration { get {return currentIteration;} set { currentIteration = value; } }

            public double[,] WeightsMomentum { get {return weightsMomentum;} private set { weightsMomentum = value; } }

            [System.Runtime.Serialization.DataMember]
            public double[] BiasesMomentum { get {return biasesMomentum;} private set { biasesMomentum = value; } }

            public double[,] WeightsCache { get {return weightsCache;} private set { weightsCache = value; } }

            [System.Runtime.Serialization.DataMember]
            public double[] BiasesCache { get {return biasesCache;} private set { biasesCache = value; } }

            [System.Runtime.Serialization.DataMember]
            private double[][] serializedWeightsCache;

            [System.Runtime.Serialization.DataMember]
            private double[][] serializedWeightsMomentum;

            /// <summary>
            /// Turn multi-dimensional arrays into jagged arrays so they can be serialized.
            /// </summary>
            [System.Runtime.Serialization.OnSerializing]
            private void BeforeSerializing(System.Runtime.Serialization.StreamingContext context)
            {
                if (this.weightsCache != null)
                {
                    int row = this.weightsCache.GetLength(0);
                    int col = this.weightsCache.GetLength(1);

                    this.serializedWeightsCache = new double[row][];
                    this.serializedWeightsMomentum = new double[row][];

                    for (int i = 0; i < row; i++)
                    {
                        this.serializedWeightsCache[i] = new double[col];
                        this.serializedWeightsMomentum[i] = new double[col];

                        for (int j = 0; j < col; j++)
                        {
                            this.serializedWeightsCache[i][j] = this.weightsCache[i, j];
                            this.serializedWeightsMomentum[i][j] = this.weightsMomentum[i, j];
                        }
                    }
                }
            }

            /// <summary>
            /// Turn serialized jagged arrays into originally shaped multi-dimensional arrays.
            /// </summary>                
            [System.Runtime.Serialization.OnDeserialized]
            private void AfterDeserializing(System.Runtime.Serialization.StreamingContext ctx)
            {
                if (this.serializedWeightsCache == null)
                {
                    this.weightsCache = null;
                }
                else
                {
                    int row = this.serializedWeightsCache.Length;
                    int col = this.serializedWeightsCache[0].Length;

                    for (int i = 1; i < row; i++)
                    {
                        if (this.serializedWeightsCache[i].Length != col)
                        {
                            throw new System.InvalidOperationException("The serialized array does not match the multi-dimensional one");
                        }
                    }

                    this.weightsCache = new double[row, col];

                    for (int i = 0; i < row; i++)
                    {
                        for (int j = 0; j < col; j++)
                        {
                            this.weightsCache[i, j] = this.serializedWeightsCache[i][j];
                        }
                    }
                }

                if (this.serializedWeightsMomentum == null)
                {
                    this.weightsMomentum = null;
                }
                else
                {
                    int row = this.serializedWeightsMomentum.Length;
                    int col = this.serializedWeightsMomentum[0].Length;

                    for (int i = 1; i < row; i++)
                    {
                        if (this.serializedWeightsMomentum[i].Length != col)
                        {
                            throw new System.InvalidOperationException("The serialized array does not match the multi-dimensional one");
                        }
                    }

                    this.weightsMomentum = new double[row, col];

                    for (int i = 0; i < row; i++)
                    {
                        for (int j = 0; j < col; j++)
                        {
                            this.weightsMomentum[i, j] = this.serializedWeightsMomentum[i][j];
                        }
                    }
                }
            }
            #endregion

            public Adaptive_Momentum(double learningRate = 0.001, double decay = 0, double epsilon = 1E-7, double beta1 = 0.9, double beta2 = 0.999)
            {
                this.learningRate = learningRate;
                this.epsilon = epsilon;
                this.decayRate = decay;
                this.beta1 = beta1;
                this.beta2 = beta2;
            }

            public Adaptive_Momentum(Adaptive_Momentum AM)
            {
                this.learningRate = AM.learningRate;
                this.epsilon = AM.epsilon;
                this.decayRate = AM.decayRate;
                this.beta1 = AM.beta1;
                this.beta2 = AM.beta2;
            }

            public Adaptive_Momentum() { }

            /// <summary>
            /// Get the parameters for optimizer.
            /// </summary>
            /// <returns>A string of the parameters for optimizer.</returns>
            public string GetParameters()
            {
                string parameters = "Optimizer: adaptive_momentum\n" +
                                    "    Learning Rate: " + this.learningRate + "\n" +
                                    "    Decay Rate: " + this.decayRate + "\n" +
                                    "    Epsilon: " + this.epsilon + "\n" + 
                                    "    Beta 1: " + this.beta1 + "\n" + 
                                    "    Beta 2: " + this.beta2 + "\n";

                return parameters;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the weights.
            /// </summary>
            /// <param name="weights">The current weights of neuron layer.</param>
            /// <param name="devWeights">The current gradient being passed onto the weights.</param>
            /// <returns>The new adjusted weights.</returns>
            public double[,] Weights(double[,] weights, double[,] devWeights)
            {
                int row = devWeights.GetLength(0);
                int col = devWeights.GetLength(1);

                // one first iteration the weightsMomentums and weightsCache arrays will be null
                // so set it to an instance and fill it with zeros
                if (this.weightsCache == null)
                {
                    this.weightsCache = new double[row, col];
                    this.weightsMomentum = new double[row, col];
                }

                // update cache with squard gradient
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        this.weightsMomentum[i, j] = this.weightsMomentum[i, j] * this.beta1 + (1 - this.beta1) * devWeights[i, j];

                // update cache with squard gradient
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        this.weightsCache[i, j] = this.weightsCache[i, j] * this.beta2 + (1 - this.beta2) * System.Math.Pow(devWeights[i, j], 2);

                double[,] correctedMomentum = new double[row, col];
                double[,] correctedCache = new double[row, col];

                // set corrected momentum
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        correctedMomentum[i, j] = this.weightsMomentum[i, j] / (1 - System.Math.Pow(this.beta1, this.currentIteration + 1));

                // set correctedCache
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        correctedCache[i, j] = this.weightsCache[i, j] / (1 - System.Math.Pow(this.beta2, this.currentIteration + 1));

                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;

                Config.LearningRate = this.currentLearningRate;

                // calculate weights plus normalization with square rooted cache
                for (int i = 0; i < row; i++)
                    for (int j = 0; j < col; j++)
                        weights[i, j] += -correctedMomentum[i, j] * this.currentLearningRate / (System.Math.Sqrt(correctedCache[i, j]) + this.epsilon);

                this.currentIteration++;

                return weights;
            }

            /// <summary>
            /// Optimize the gradients being passed onto the biases.
            /// </summary>
            /// <param name="biases">The current biases of neuron layer.</param>
            /// <param name="devBiases">The current gradient being passed onto the biases.</param>
            /// <returns>The new adjusted biases.</returns>
            public double[] Biases(double[] biases, double[] devBiases)
            {
                int length = devBiases.Length;

                // one first iteration the biasMomentums and biasesCache arrays will be null
                // so set it to an instance and fill it with zeros
                if (this.biasesCache == null)
                {
                    this.biasesCache = new double[length];
                    this.biasesMomentum = new double[length];
                }

                // update cache with squard gradient
                for (int i = 0; i < length; i++)
                    this.biasesMomentum[i] = this.biasesMomentum[i] * this.beta1 + (1 - this.beta1) * devBiases[i];

                // update cache with squard gradient
                for (int i = 0; i < length; i++)
                    this.biasesCache[i] = this.biasesCache[i] * this.beta2 + (1 - this.beta2) * System.Math.Pow(devBiases[i], 2);

                double[] correctedMomentum = new double[length];
                double[] correctedCache = new double[length];

                // we dont need to recalculate the current learning rate since we do this for the weights first
                /*
                // if the decay rate is greater than zero then update the current learning rate
                if (this.decayRate > 0)
                    this.currentLearningRate = this.learningRate * (1 / (1 + this.decayRate * this.currentIteration));
                else
                    this.currentLearningRate = this.learningRate;
                */
                
                // set corrected momentum
                for (int i = 0; i < length; i++)
                    correctedMomentum[i] = this.biasesMomentum[i] / (1 - System.Math.Pow(this.beta1, this.currentIteration + 1));

                // set correctedCache
                for (int i = 0; i < length; i++)
                    correctedCache[i] = this.biasesCache[i] / (1 - System.Math.Pow(this.beta2, this.currentIteration + 1));

                // calculate biases plus normalization with square rooted cache
                for (int i = 0; i < length; i++)
                    biases[i] += -correctedMomentum[i] * this.currentLearningRate / (System.Math.Sqrt(correctedCache[i]) + this.epsilon);

                // we dont need to adjust the iterations since this is done in the weights first
                // this.currentIteration++;

                return biases;
            }
        }
    }

    [System.Runtime.Serialization.DataContract]
    private class Dropout
    {
        #region Dropout Variables
        private double rate;
        
        private double[,] binaryMask;

        [System.Runtime.Serialization.DataMember]
        public double Rate { get { return rate; } set { rate = value; } }

        [System.Runtime.Serialization.DataMember]
        private double[][] serializedBinaryMask;
        
        /// <summary>
        /// Turn multi-dimensional arrays into jagged arrays for serializing.
        /// </summary>
        [System.Runtime.Serialization.OnSerializing]
        private void BeforeSerializing(System.Runtime.Serialization.StreamingContext context)
        {
            if (this.binaryMask != null)
            {
                int row = this.binaryMask.GetLength(0);
                int col = this.binaryMask.GetLength(1);

                this.serializedBinaryMask = new double[row][];

                for (int i = 0; i < row; i++)
                {
                    this.serializedBinaryMask[i] = new double[col];

                    for (int j = 0; j < col; j++)
                    {
                        this.serializedBinaryMask[i][j] = this.binaryMask[i, j];
                    }
                }
            }
        }

        /// <summary>
        /// Turn serialized jagged arrays into originally shaped multi-dimensional arrays.
        /// </summary>                
        [System.Runtime.Serialization.OnDeserialized]
        private void AfterDeserializing(System.Runtime.Serialization.StreamingContext ctx)
        {
            if (this.serializedBinaryMask == null)
            {
                this.binaryMask = null;
            }
            else
            {
                int row = this.serializedBinaryMask.Length;
                int col = this.serializedBinaryMask[0].Length;

                for (int i = 1; i < row; i++)
                {
                    if (this.serializedBinaryMask[i].Length != col)
                    {
                        throw new System.InvalidOperationException("The serialized array does not match the multi-dimensional one");
                    }
                }

                this.binaryMask = new double[row, col];

                for (int i = 0; i < row; i++)
                {
                    for (int j = 0; j < col; j++)
                    {
                        this.binaryMask[i, j] = this.serializedBinaryMask[i][j];
                    }
                }
            }
        }
        #endregion

        public Dropout(double dropoutRate = 0)
        {
            this.rate = 1 - dropoutRate;
        }

        public Dropout() { }

        /// <summary>
        /// Applys a binary dropout filter to the current neuron layer.
        /// </summary>
        /// <param name="inputs">The outputs of the current layer.</param>
        /// <returns>
        /// The updated outputs as a 2d array.
        /// </returns>
        public double[,] Forward(double[,] input)
        {
            System.Random rand = new System.Random();

            int batchSize = input.GetLength(0);
            int totalNeurons = input.GetLength(1);

            double[,] output = input; 

            this.binaryMask = new double[batchSize, totalNeurons];

            for (int set = 0; set < batchSize; set++)
            {
                // create list of ints whos values match their index
                System.Collections.Generic.List<int> inactiveNeurons = new System.Collections.Generic.List<int>();
                for (int i = 0; i < totalNeurons; i++)
                    inactiveNeurons.Add(i);

                double activatedCount = 1;

                while (activatedCount / totalNeurons < this.rate)
                {
                    // choose a random index from the list and set its value to the activated neuron
                    // we are choosing a random index from the list, not a random value
                    int activate = inactiveNeurons[rand.Next(0, inactiveNeurons.Count)];

                    // remove actiavted neuron from the list
                    // it's important to remove the value, not at the index the value represents
                    // this would cause an out of bounds error
                    inactiveNeurons.Remove(activate);

                    this.binaryMask[set, activate] = 1 / this.rate;

                    activatedCount++;
                }

                for (int i = 0 ; i < totalNeurons; i++)
                    output[set, i] *= this.binaryMask[set, i];
            }

            return output;
        }

        /// <summary>
        /// partial derivative of the dropout layer.
        /// </summary>
        /// <param name="recievedGradient">The recieved gradient from previous layer.</param>
        /// <returns>
        /// The updated gradient as a 2d array.
        /// </returns>
        public double[,] Backward(double[,] recievedGradient)
        {
            int batchSize = recievedGradient.GetLength(0);
            int totalNeurons = recievedGradient.GetLength(1);

            double[,] output = new double[batchSize, totalNeurons];

            for (int set = 0; set < batchSize; set++)
                for (int neuron = 0; neuron < totalNeurons; neuron++)
                    output[set, neuron] = recievedGradient[set, neuron] * this.binaryMask[set, neuron];

            return output;
        }
    }

    [System.Runtime.Serialization.DataContract]
    public class AccuracyFunction
    {
        [System.Runtime.Serialization.DataContract]
        public class True_Class_Mean : AccuracyFunctionHolder
        {
            /// <summary>
            /// Calculates the accuracy of the neural network.
            /// </summary>
            /// <param name="networkOutputs">The calculated final outputs of the neural network.</param>
            /// <param name="target">The one-hot encoded target classes.</param>
            /// <returns>
            /// A double as the decimal percent of the accuracy of the neural network.
            /// </returns>
            public double Compare(double[,] networkOutputs, double[,] target)
            {
                double acc = 0;

                int batchSize = networkOutputs.GetLength(0);
                int potentialClasses = networkOutputs.GetLength(1);

                for (int set = 0; set < batchSize; set++)
                {
                    // set the first value as the current perceived maximum
                    double predMax = networkOutputs[set, 0];
                    int predMaxIndex = 0;
                    int targetClass = 0;
                    
                    // iterate through each neuron output
                    for (int i = 1; i < potentialClasses; i++)
                    {   
                        // set new predicted network maximum if new maximum is found
                        if (networkOutputs[set, i] > predMax)
                        {
                            predMax = networkOutputs[set, i];
                            predMaxIndex = i;
                        }

                        // set new target class maximal if new maximum is found
                        if (target[set, i] > target[set, i - 1])
                            targetClass = i;
                    }

                    // if the max values of the predicted network and target class are equal then
                    // add it to the accuracy score
                    if (targetClass == predMaxIndex)
                        acc++;
                }

                return acc / batchSize;
            }
        }
    }

    private class Evaluate
    {
        /// <summary>
        /// Display neural network results.
        /// </summary>
        /// <param name="loss">The calculated data loss of batch.</param>
        /// <param name="regularizationLoss">The calculated regularization loss of batch.</param>
        /// <param name="currentIteration">The current batch iteration.</param>
        /// <param name="accuracy">The accuracy of the neural network outputs.</param>
        /// <param name="learningRate">The iteration learning rate.</param>
        public static void Display(double dataLoss, double accuracy, int currentIteration, double regularizationLoss, double learningRate)
        {
            double totalLoss = dataLoss + regularizationLoss;

            System.Console.WriteLine(
                                    $"Iteration: " + (currentIteration + 1) + 
                                    " | Accuracy: " + System.Math.Round(accuracy, 4) + 
                                    " | Loss: " + System.Math.Round(totalLoss, 4) + 
                                    " | Data Loss: " + System.Math.Round(dataLoss, 4) + 
                                    " | Regularization Loss: " + System.Math.Round(regularizationLoss, 4) +
                                    " | Learning Rate: " + System.Math.Round(learningRate, 10)
                                    );
        }

        /// <summary>
        /// Display neural network results.
        /// </summary>
        /// <param name="loss">The calculated epoch data loss of batch.</param>
        /// <param name="regularizationLoss">The calculated epoch regularization loss of batch.</param>
        /// <param name="currentIteration">The current batch iteration.</param>
        /// <param name="accuracy">The epoch accuracy of the neural network outputs.</param>
        /// <param name="learningRate">The epoch learning rate.</param>
        /// <param name="epoch">The current epoch.</param>
        public static void DisplayEpoch(double dataLoss, double accuracy, int currentIteration, double regularizationLoss, double learningRate, int epoch)
        {
            double totalLoss = dataLoss + regularizationLoss;

            System.Console.WriteLine(
                                    $"Epoch Summary: " + (epoch + 1) + 
                                    " | Accuracy: " + System.Math.Round(accuracy / currentIteration, 4) + 
                                    " | Loss: " + System.Math.Round(totalLoss / currentIteration, 4) + 
                                    " | Data Loss: " + System.Math.Round(dataLoss / currentIteration, 4)+ 
                                    " | Regularization Loss: " + System.Math.Round(regularizationLoss / currentIteration, 4) +
                                    " | Learning Rate: " + System.Math.Round(learningRate / currentIteration, 10)
                                    );
        }

        /// <summary>
        /// Display neural network final results on System.Console.
        /// </summary>
        /// <param name="loss">The calculated loss of batch.</param>
        /// <param name="accuracy">The accuracy of the neural network outputs.</param>
        public static void Results(double[] loss, double accuracy)
        {
            int batchSize = loss.Length;
            double dataLoss = 0;
            for (int j = 0; j < batchSize; j++)
                dataLoss += loss[j];
            dataLoss /= loss.Length;

            System.Console.WriteLine($"Validation | Accuracy: " + System.Math.Round(accuracy, 2) + " | Loss: " + System.Math.Round(dataLoss, 4));
        }
    }

    private class HelperMethods
    {
        /// <summary>
        /// Set data into randomized batches for training and testing.
        /// </summary>
        /// <param name="input">The input dataset.</param>
        /// <param name="target">The target dataset.</param>
        /// <param name="batchSize">The size of each batch of data.</param>
        /// <returns>The randomized and batched inputs and targets.</returns>
        public static (double[][,], double[][,]) BatchData(double[,] input, double[,] target, int batchSize = 0)
        {
            int numBatches;

            double[][,] batchedInput;
            double[][,] batchedTarget;

            System.Random rand = new System.Random();

            int iterations;
            if (batchSize > 0)
                iterations = input.GetLength(0) / batchSize;
            else
                iterations = 1;

            // set batched data
            if (batchSize > 0 && batchSize <= input.GetLength(0))
            {
                batchedInput = new double[
                                        System.Convert.ToInt32(
                                        System.Math.Ceiling(
                                        System.Convert.ToDouble(input.GetLength(0)) / System.Convert.ToDouble(batchSize)))
                                        ][,];
                
                batchedTarget = new double[
                                        System.Convert.ToInt32(
                                        System.Math.Ceiling(
                                        System.Convert.ToDouble(target.GetLength(0)) / System.Convert.ToDouble(batchSize)))
                                        ][,];

                numBatches = batchedInput.Length;

                int startIndex = 0;

                // create list of indicies which will be used to shuffle dataset
                int totalIndices = input.GetLength(0);
                System.Collections.Generic.List<int> indices = new System.Collections.Generic.List<int>();
                for (int i = 0; i < totalIndices; i++)
                    indices.Add(i);

                // iterate through each batch
                for (int i = 0; i < numBatches; i++)
                {
                    // ensures no out of bounds error by adjusting the final batch size
                    int newBatchSize = batchSize;
                    if (batchSize + startIndex > input.GetLength(0))
                        newBatchSize = input.GetLength(0) - startIndex - 1;

                    int inputs = input.GetLength(1);
                    int classes = target.GetLength(1);

                    double[,] newInputBatch = new double[newBatchSize, inputs];
                    double[,] newTargetBatch = new double[newBatchSize, classes];
                    
                    // iterate through each data point of each batch
                    for (int j = startIndex; j < newBatchSize + startIndex; j++)
                    {
                        // select random input data to add to batch
                        int index = rand.Next(0, indices.Count);

                        for (int k = 0; k < inputs; k++)
                        {
                            newInputBatch[j - startIndex, k] = input[indices[index], k];
                        }

                        for (int k = 0; k < classes; k++)
                        {
                            newTargetBatch[j - startIndex, k] = target[indices[index], k];
                        }

                        indices.Remove(indices[index]);
                    }

                    // set batch
                    batchedInput[i] = newInputBatch;
                    batchedTarget[i] = newTargetBatch;

                    startIndex += newBatchSize;
                }
            }
            else
            {
                batchedInput = new double[1][,];
                batchedTarget = new double[1][,];

                // create list of indicies which will be used to shuffle dataset
                int totalIndices = input.GetLength(0);
                System.Collections.Generic.List<int> indices = new System.Collections.Generic.List<int>();
                for (int i = 0; i < totalIndices; i++)
                    indices.Add(i);

                int inputs = input.GetLength(1);
                int classes = target.GetLength(1);

                double[,] newInputBatch = new double[input.GetLength(0), inputs];
                double[,] newTargetBatch = new double[target.GetLength(0), classes];

                batchSize = input.GetLength(0);

                // iterate through each data point of each batch
                for (int j = 0; j < batchSize; j++)
                {
                    // select random input data to add to batch
                    int index = rand.Next(0, indices.Count);
                    
                    // set the new index of the inputs
                    for (int k = 0; k < inputs; k++)
                    {
                        newInputBatch[j, k] = input[indices[index], k];
                    }

                    // set the new index of the targets
                    for (int k = 0; k < classes; k++)
                    {
                        newTargetBatch[j, k] = target[indices[index], k];
                    }

                    indices.Remove(indices[index]);
                }

                batchedInput[0] = newInputBatch;
                batchedTarget[0] = newTargetBatch;

                numBatches = 1;
            }

            return (batchedInput, batchedTarget);
        }

        /// <summary>
        /// Transposes any 2d matrix.
        /// </summary>
        /// <param name="matrix">The array that is to be transposed.</param>
        /// <returns> 
        /// The transposed 2d array.
        /// </returns>
        public static double[,] Transpose(double[,] matrix)
        {
            int row = matrix.GetLength(1);
            int col = matrix.GetLength(0);

            double[,] matrixT = new double[row, col];

            // save each value as the reverse of its original placement
            for (int i = 0; i < row; i++)
                for (int j = 0; j < col; j++)
                    matrixT[i, j] = matrix[j, i];

            return matrixT;
        }

        /// <summary>
        /// Calculates the dot product of two 2d martrices.
        /// </summary>
        /// <param name="matrix1">The first 2d array.</param>
        /// <param name="matrix2">The second 2d array.</param>
        /// <remarks>The first arrays number of columns must be equal to the second arrays number of rows.</remarks>
        /// <returns> 
        /// The dot product of two 2d martrices as a 2d array.
        /// </returns>
        public static double[,] DotProduct(double[,] matrix1, double[,] matrix2)
        {
            var Rows1 = matrix1.GetLength(0);
            var Cols1 = matrix1.GetLength(1);
            var Rows2 = matrix2.GetLength(0);
            var Cols2 = matrix2.GetLength(1);
            
            if (Cols1 != Rows2)
                throw new System.InvalidOperationException
                    ("Dot product is undefined. The number of columns of first matrix must equal to the number of rows of second matrix.");

            double[,] product = new double[Rows1, Cols2];
            
            for (int row1 = 0; row1 < Rows1; row1++) // iterate through the first matrices rows
                for (int col2 = 0; col2 < Cols2; col2++) // iterate through the second matrices columns
                    for (int col1 = 0; col1 < Cols1; col1++) // iterate through the first matrices columns
                        product[row1, col2] += matrix1[row1, col1] * matrix2[col1, col2];

            return product;
        }

        /// <summary>
        /// Turns nested arrays input data into a 2d array suitable for neural network to process.
        /// </summary>
        /// <param name="data">A nested array of data inputs.</param>
        /// <remarks>Each nested array must be equal in length.</remarks>
        /// <returns>Data set of network inputs in a 2d array.</returns>
        public double[,] SetDataStucture(System.Collections.Generic.List<double[]> inputData)
        {
            int row = inputData.Count;
            int col = inputData[0].Length;

            double[,] convertedData = new double[row, col];

            for (int i = 0; i < row; i++)
            {
                if (inputData[i].Length != col)
                    throw new System.InvalidOperationException ("Inconsistent input data structure.");

                for (int j = 0; j < col; j++)
                    convertedData[i, j] = inputData[i][j];
            }

            return convertedData;
        }

        /// <summary>
        /// Turns jagged arrays input data into a 2d array suitable for neural network to process.
        /// </summary>
        /// <param name="data">A jagged array of data inputs.</param>
        /// <remarks>Each array must be equal in length and shape.</remarks>
        /// <returns>Data set of network inputs in a 2d array.</returns>
        public double[,] SetDataStucture(double[][] inputData)
        {
            int row = inputData.Length;
            int col = inputData[0].Length;

            double[,] convertedData = new double[row, col];

            for (int i = 0; i < row; i++)
            {
                if (inputData[i].Length != col)
                    throw new System.InvalidOperationException ("Inconsistent input data structure.");

                for (int j = 0; j < col; j++)
                    convertedData[i, j] = inputData[i][j];
            }

            return convertedData;
        }

        /// <summary>
        /// Turns nested lists input data into a 2d array suitable for neural network to process.
        /// </summary>
        /// <param name="data">A nested list of data inputs.</param>
        /// <remarks>Each nested list must be equal in length.</remarks>
        /// <returns>Data set of network inputs in a 2d array.</returns>
        public double[,] SetDataStucture(System.Collections.Generic.List<System.Collections.Generic.List<double>> inputData)
        {
            int row = inputData.Count;
            int col = inputData[0].Count;

            double[,] convertedData = new double[row, col];

            for (int i = 0; i < row; i++)
            {
                if (inputData[i].Count != col)
                    throw new System.InvalidOperationException ("Inconsistent input data structure.");

                for (int j = 0; j < col; j++)
                    convertedData[i, j] = inputData[i][j];
            }

            return convertedData;
        }

        /// <summary>
        /// Turn sequence classes into one-hot encoded data sets for neural network.
        /// </summary>
        /// <param name="data">1d array of sequence classes.</param>
        /// <returns> 
        /// Data set of one-hot encoded classes in a 2d array.
        /// </returns>
        public double[,] OneHotEncode(double[] data)
        {
            int batchSize = data.Length;

            int totalClasses = 0;
            for (int i = 0; i < batchSize; i++)
                if (data[i] > totalClasses)
                    totalClasses = System.Convert.ToInt32(data[i]);

            double[,] OHEDataSet = new double[batchSize, totalClasses];

            for(int set = 0; set < batchSize; set++)
                OHEDataSet[set, System.Convert.ToInt32(data[set])] = 1;

            return OHEDataSet;
        }

        /// <summary>
        /// Turn sequence classes into one-hot encoded data sets for neural network.
        /// </summary>
        /// <param name="data">1d array of sequence classes.</param>
        /// <returns> 
        /// Data set of one-hot encoded classes in a 2d array.
        /// </returns>
        public double[,] OneHotEncode(int[] data)
        {
            int batchSize = data.Length;

            int totalClasses = 0;
            for (int i = 0; i < batchSize; i++)
                if (data[i] > totalClasses)
                    totalClasses = data[i];

            double[,] OHEDataSet = new double[batchSize, totalClasses];

            for(int set = 0; set < batchSize; set++)
                OHEDataSet[set, data[set]] = 1;

            return OHEDataSet;
        }

        /// <summary>
        /// Turn sequence classes into one-hot encoded data sets for neural network.
        /// </summary>
        /// <param name="data">A list of sequence classes.</param>
        /// <returns> 
        /// Data set of one-hot encoded classes in a 2d array.
        /// </returns>
        public double[,] OneHotEncode(System.Collections.Generic.List<int> data)
        {
            int batchSize = data.Count;

            int totalClasses = 0;
            for (int i = 0; i < batchSize; i++)
                if (data[i] > totalClasses)
                    totalClasses = data[i];

            double[,] OHEDataSet = new double[batchSize, totalClasses];

            for(int set = 0; set < batchSize; set++)
                OHEDataSet[set, data[set]] = 1;

            return OHEDataSet;
        }
    }

    public class Serialization
    {
        /// <summary>
        /// Use data contract serialization to write object to xml file.
        /// </summary>
        /// <param name="serializableObject">The object that is to be serialized</param>
        /// <param name="filePath">The file location and name.</param>
        public static void WriteObject<T>(T serializableObject, string filePath)
        {
            System.Runtime.Serialization.DataContractSerializer serializer = new System.Runtime.Serialization.DataContractSerializer(typeof(T));
            System.Xml.XmlWriterSettings settings = new System.Xml.XmlWriterSettings()
            {
                Indent = true,
                IndentChars = "\t",
            };
            System.Xml.XmlWriter writer = System.Xml.XmlWriter.Create(filePath, settings);
            
            serializer.WriteObject(writer, serializableObject);
            
            writer.Close();
        }

        /// <summary>
        /// Read serialized xml file object by data contract deserialization.
        /// </summary>
        /// <param name="filePath">The file location and name.</param>
        /// <return>The deserialized object.</return>
        public static T ReadObject<T>(string filepath)
        {
            System.IO.FileStream fileStream = new System.IO.FileStream(filepath, System.IO.FileMode.Open);
            System.Xml.XmlDictionaryReader reader = System.Xml.XmlDictionaryReader.CreateTextReader(fileStream, new System.Xml.XmlDictionaryReaderQuotas());
            System.Runtime.Serialization.DataContractSerializer serializer = new System.Runtime.Serialization.DataContractSerializer(typeof(T));
            
            T serializableObject = (T)serializer.ReadObject(reader, true);
            
            reader.Close();
            fileStream.Close();

            return serializableObject;
        }
    }
}