private GradientWeightVector[] GradientBackPropArr(DataSample[] samples)
        {
            GradientWeightVector[] gradients = new GradientWeightVector[samples.Length];

            for (int i = 0; i < samples.Length; i++)
            {
                gradients[i] = GradientBackProp(samples[i]);
            }
            return(gradients);
        }
        private GradientWeightVector GradientBackProp(DataSample sample)
        {
            // Backprop Output Layer
            GradientWeightVector weightVector = new GradientWeightVector(this);

            float[] idealVector      = CreateIdealVector(sample.label);
            float[] outputLayerDelta = new float[outputSize];
            float[,] hiddenLayerDelta = new float[hiddenDimension - 1, hiddenSize];
            float[] inputLayerDelta = new float[inputSize];

            SigmoidActivation(sample);
            Console.WriteLine("Label: " + sample.label);
            Console.WriteLine();
            PrintOutput();
            Console.WriteLine();
            Console.WriteLine("Cost after Activation is: " + ReturnCost(CreateIdealVector(sample.label)));
            PrintAverageCost();

            // Backpropagation for the outputlayer
            for (int i = 0; i < outputSize; i++)
            {
                outputLayerDelta[i] = -(idealVector[i] - outputLayerValue[i]) * outputLayerValue[i] *
                                      (1 - outputLayerValue[i]);

                for (int j = 0; j < hiddenSize; j++)
                {
                    weightVector.outputLayerWeights[i, j] = learningRate * (outputLayerDelta[i] * hiddenLayerValue[hiddenDimension - 1, j]);
                }
            }

            // Finding delta and gradients for the last hidden layer
            for (int i = 0; i < hiddenSize; i++)
            {
                float sum = 0;

                for (int j = 0; j < outputSize; j++)
                {
                    for (int k = 0; k < hiddenSize; k++)
                    {
                        sum += outputLayerDelta[j] * outputLayerWeights[j, k];
                    }
                }

                hiddenLayerDelta[hiddenDimension - 2, i] = sum;

                for (int j = 0; j < hiddenSize; j++)
                {
                    weightVector.hiddenLayerWeights[hiddenDimension - 2, i, j] = learningRate * (hiddenLayerDelta[hiddenDimension - 2, i] *
                                                                                                 hiddenLayerValue[hiddenDimension - 1, i] * (1 - hiddenLayerValue[hiddenDimension - 1, i]) *
                                                                                                 hiddenLayerValue[hiddenDimension - 2, j]);
                }
            }

            for (int i = hiddenDimension - 2; i > 0; i--)
            {
                for (int j = 0; j < hiddenSize; j++)
                {
                    float sum = 0;

                    for (int k = 0; k < hiddenSize; k++)
                    {
                        sum += hiddenLayerDelta[i, j] * hiddenLayerWeights[i, j, k];
                    }

                    hiddenLayerDelta[i, j] = sum;

                    for (int k = 0; k < hiddenSize; k++)
                    {
                        weightVector.hiddenLayerWeights[i, j, k] = learningRate * (hiddenLayerDelta[i, j] * hiddenLayerValue[i, j] *
                                                                                   (1 - hiddenLayerValue[i, j]) * hiddenLayerValue[i - 1, j]);
                    }
                }
            }

            // BackPropagation for the input layer
            for (int i = 0; i < inputSize; i++)
            {
                float sum = 0;

                for (int j = 0; j < hiddenSize; j++)
                {
                    sum += hiddenLayerDelta[0, j] * inputLayerWeights[i, j];
                }

                inputLayerDelta[i] = sum;

                for (int j = 0; j < hiddenSize; j++)
                {
                    weightVector.inputLayerWeights[i, j] = learningRate * (inputLayerDelta[i] * hiddenLayerValue[0, j] *
                                                                           (1 - hiddenLayerValue[0, j]) * (float)inputLayerValue[i]);
                }
            }

            return(weightVector);
        }