/// <summary>
        /// Tests the network over a given test dataset. Returns the error ( sum(|a - y(x)|^2)/n ). The out param will count the data that was correctly categorized using a given function.
        /// </summary>
        private static double TestNetwork(DeepNeuralNetwork nn, int testingDataSetSize, GetNextDataDelegate getNextData, out int correctCount, CheckIfCorrectDelegate checkCorrect)
        {
            correctCount = 0;
            Vector <double> error = new DenseVector(nn.OutputLayer.GetNeuronCount());

            for (int i = 0; i < testingDataSetSize; ++i)
            {
                LabeledData     labeledData = getNextData(i);
                Vector <double> result      = nn.ProcessInput(labeledData.InputValues);
                if (checkCorrect(result.AsArray(), labeledData.OutputValues.AsArray()))
                {
                    ++correctCount;
                }
                Vector <double> diff = labeledData.OutputValues - result;
                error += diff.PointwiseMultiply(diff);
            }
            error = error.Divide(testingDataSetSize);
            return(error.Average());
        }
        private static (Matrix <double>[] nablaW, Vector <double>[] nablaB) BackProp(DeepNeuralNetwork nn, LabeledData trainingData)
        {
            Matrix <double>[] nablaW;
            Vector <double>[] nablaB;
            (nablaW, nablaB) = GenParamsZero(nn);

            //activation before applying sigm function
            Vector <double>         z;
            List <Vector <double> > zs = new List <Vector <double> >();
            //activation vector
            Vector <double>         a           = trainingData.InputValues;
            List <Vector <double> > activations = new List <Vector <double> >();

            activations.Add(a);
            //feedforward
            foreach (ComputedLayer layer in nn.ComputedLayers)
            {
                z = layer.Weights * a + layer.Biasses;
                zs.Add(z);
                a = Utils.Sigmoid(z);
                activations.Add(a);
            }
            //backward pass
            Vector <double> delta = CostDerivative(activations.Last(), trainingData.OutputValues).PointwiseMultiply(Utils.SigmoidPrime(zs.Last()));

            nablaB[nn.ComputedLayers.Length - 1] = delta;
            nablaW[nn.ComputedLayers.Length - 1] = delta.ToColumnMatrix() * activations[activations.Count - 2].ToRowMatrix();
            for (int i = nn.ComputedLayers.Length - 2; i >= 0; --i)
            {
                delta     = nn.ComputedLayers[i + 1].Weights.TransposeThisAndMultiply(delta).PointwiseMultiply(Utils.SigmoidPrime(zs[i]));
                nablaB[i] = delta;
                nablaW[i] = delta.ToColumnMatrix() * activations[i].ToRowMatrix();
                //note: activations[i] is actualy the activation of the previous layer since it counts the input layer as well
            }
            return(nablaW, nablaB);
        }