/// <summary> /// Stochastic gradient descent algorithm. /// </summary> public static void Sgd(DeepNeuralNetwork nn, LabeledData[] trainingData, int batchSize, int epochs, double learningRate, UpdateTrainingStatusDelegate statusUpdate, LabeledData[] testingData = null, CheckIfCorrectDelegate checkCorrect = null) { if (testingData == null || checkCorrect == null) { SgdLazy(nn, trainingData.Length, getTrainingData, batchSize, epochs, learningRate, statusUpdate); } else { SgdLazy(nn, trainingData.Length, getTrainingData, batchSize, epochs, learningRate, statusUpdate, testingData.Length, getTestingData, checkCorrect); } LabeledData getTrainingData(int index) { return(trainingData[index]); } LabeledData getTestingData(int index) { return(testingData[index]); } }
/// <summary> /// Stochastic gradient descent algorithm with lazy data loading. /// </summary> public static void SgdLazy(DeepNeuralNetwork nn, int trainingDataSetSize, GetNextDataDelegate getTrainingData, int batchSize, int epochs, double learningRate, UpdateTrainingStatusDelegate statusUpdate, int testDataSetSize = 0, GetNextDataDelegate getTestingData = null, CheckIfCorrectDelegate checkCorrect = null) { Random rng = new Random(); DateTime startingTime = DateTime.Now; int[] dataSetIndexes = new int[trainingDataSetSize]; for (int i = 0; i < dataSetIndexes.Length; ++i) { dataSetIndexes[i] = i; } int currentBatchOffset; int currentBatchSize; for (int epoch = 1; epoch <= epochs; ++epoch) { dataSetIndexes.Shuffle(rng); currentBatchOffset = 0; int currentBatch = 0; for (; currentBatchOffset < trainingDataSetSize; currentBatchOffset += batchSize, ++currentBatch) { int remainingDataSize = trainingDataSetSize - currentBatchOffset; currentBatchSize = remainingDataSize < batchSize ? remainingDataSize : batchSize; TrainWithBatchLazy(nn, currentBatchSize, getData, learningRate); } TrainingStatus status = new TrainingStatus { EpochsDone = epoch }; if (testDataSetSize != 0 && getTestingData != null && checkCorrect != null) { double errorRate = TestNetwork(nn, testDataSetSize, getTestingData, out int correctCount, checkCorrect); status.Error = errorRate; status.Correct = correctCount; } //status update TimeSpan elapsed = DateTime.Now - startingTime; TimeSpan remaining = TimeSpan.FromTicks((long)(elapsed.Ticks * ((epochs - epoch) / (double)epoch))); status.ElapsedTime = elapsed; status.TimeLeft = remaining; statusUpdate(status); } LabeledData getData(int indexInBatch) { int actualIndex = dataSetIndexes[currentBatchOffset + indexInBatch]; return(getTrainingData(actualIndex)); } }
/// <summary> /// Tests the network over a given test dataset. Returns the error ( sum(|a - y(x)|^2)/n ). The out param will count the data that was correctly categorized using a given function. /// </summary> private static double TestNetwork(DeepNeuralNetwork nn, int testingDataSetSize, GetNextDataDelegate getNextData, out int correctCount, CheckIfCorrectDelegate checkCorrect) { correctCount = 0; Vector <double> error = new DenseVector(nn.OutputLayer.GetNeuronCount()); for (int i = 0; i < testingDataSetSize; ++i) { LabeledData labeledData = getNextData(i); Vector <double> result = nn.ProcessInput(labeledData.InputValues); if (checkCorrect(result.AsArray(), labeledData.OutputValues.AsArray())) { ++correctCount; } Vector <double> diff = labeledData.OutputValues - result; error += diff.PointwiseMultiply(diff); } error = error.Divide(testingDataSetSize); return(error.Average()); }