/// <summary>
        /// Stochastic gradient descent algorithm.
        /// </summary>
        public static void Sgd(DeepNeuralNetwork nn, LabeledData[] trainingData, int batchSize, int epochs, double learningRate,
                               UpdateTrainingStatusDelegate statusUpdate, LabeledData[] testingData = null, CheckIfCorrectDelegate checkCorrect = null)
        {
            if (testingData == null || checkCorrect == null)
            {
                SgdLazy(nn, trainingData.Length, getTrainingData, batchSize, epochs, learningRate, statusUpdate);
            }
            else
            {
                SgdLazy(nn, trainingData.Length, getTrainingData, batchSize, epochs, learningRate, statusUpdate, testingData.Length, getTestingData, checkCorrect);
            }

            LabeledData getTrainingData(int index)
            {
                return(trainingData[index]);
            }

            LabeledData getTestingData(int index)
            {
                return(testingData[index]);
            }
        }
        /// <summary>
        /// Stochastic gradient descent algorithm with lazy data loading.
        /// </summary>
        public static void SgdLazy(DeepNeuralNetwork nn, int trainingDataSetSize, GetNextDataDelegate getTrainingData,
                                   int batchSize, int epochs, double learningRate, UpdateTrainingStatusDelegate statusUpdate,
                                   int testDataSetSize = 0, GetNextDataDelegate getTestingData = null, CheckIfCorrectDelegate checkCorrect = null)
        {
            Random   rng          = new Random();
            DateTime startingTime = DateTime.Now;

            int[] dataSetIndexes = new int[trainingDataSetSize];
            for (int i = 0; i < dataSetIndexes.Length; ++i)
            {
                dataSetIndexes[i] = i;
            }
            int currentBatchOffset;
            int currentBatchSize;

            for (int epoch = 1; epoch <= epochs; ++epoch)
            {
                dataSetIndexes.Shuffle(rng);
                currentBatchOffset = 0;
                int currentBatch = 0;
                for (; currentBatchOffset < trainingDataSetSize; currentBatchOffset += batchSize, ++currentBatch)
                {
                    int remainingDataSize = trainingDataSetSize - currentBatchOffset;
                    currentBatchSize = remainingDataSize < batchSize ? remainingDataSize : batchSize;
                    TrainWithBatchLazy(nn, currentBatchSize, getData, learningRate);
                }
                TrainingStatus status = new TrainingStatus
                {
                    EpochsDone = epoch
                };
                if (testDataSetSize != 0 && getTestingData != null && checkCorrect != null)
                {
                    double errorRate = TestNetwork(nn, testDataSetSize, getTestingData, out int correctCount, checkCorrect);
                    status.Error   = errorRate;
                    status.Correct = correctCount;
                }

                //status update
                TimeSpan elapsed   = DateTime.Now - startingTime;
                TimeSpan remaining = TimeSpan.FromTicks((long)(elapsed.Ticks * ((epochs - epoch) / (double)epoch)));
                status.ElapsedTime = elapsed;
                status.TimeLeft    = remaining;
                statusUpdate(status);
            }

            LabeledData getData(int indexInBatch)
            {
                int actualIndex = dataSetIndexes[currentBatchOffset + indexInBatch];

                return(getTrainingData(actualIndex));
            }
        }
        /// <summary>
        /// Tests the network over a given test dataset. Returns the error ( sum(|a - y(x)|^2)/n ). The out param will count the data that was correctly categorized using a given function.
        /// </summary>
        private static double TestNetwork(DeepNeuralNetwork nn, int testingDataSetSize, GetNextDataDelegate getNextData, out int correctCount, CheckIfCorrectDelegate checkCorrect)
        {
            correctCount = 0;
            Vector <double> error = new DenseVector(nn.OutputLayer.GetNeuronCount());

            for (int i = 0; i < testingDataSetSize; ++i)
            {
                LabeledData     labeledData = getNextData(i);
                Vector <double> result      = nn.ProcessInput(labeledData.InputValues);
                if (checkCorrect(result.AsArray(), labeledData.OutputValues.AsArray()))
                {
                    ++correctCount;
                }
                Vector <double> diff = labeledData.OutputValues - result;
                error += diff.PointwiseMultiply(diff);
            }
            error = error.Divide(testingDataSetSize);
            return(error.Average());
        }