/// <summary> /// Stochastic gradient descent algorithm with lazy data loading. /// </summary> public static void SgdLazy(DeepNeuralNetwork nn, int trainingDataSetSize, GetNextDataDelegate getTrainingData, int batchSize, int epochs, double learningRate, UpdateTrainingStatusDelegate statusUpdate, int testDataSetSize = 0, GetNextDataDelegate getTestingData = null, CheckIfCorrectDelegate checkCorrect = null) { Random rng = new Random(); DateTime startingTime = DateTime.Now; int[] dataSetIndexes = new int[trainingDataSetSize]; for (int i = 0; i < dataSetIndexes.Length; ++i) { dataSetIndexes[i] = i; } int currentBatchOffset; int currentBatchSize; for (int epoch = 1; epoch <= epochs; ++epoch) { dataSetIndexes.Shuffle(rng); currentBatchOffset = 0; int currentBatch = 0; for (; currentBatchOffset < trainingDataSetSize; currentBatchOffset += batchSize, ++currentBatch) { int remainingDataSize = trainingDataSetSize - currentBatchOffset; currentBatchSize = remainingDataSize < batchSize ? remainingDataSize : batchSize; TrainWithBatchLazy(nn, currentBatchSize, getData, learningRate); } TrainingStatus status = new TrainingStatus { EpochsDone = epoch }; if (testDataSetSize != 0 && getTestingData != null && checkCorrect != null) { double errorRate = TestNetwork(nn, testDataSetSize, getTestingData, out int correctCount, checkCorrect); status.Error = errorRate; status.Correct = correctCount; } //status update TimeSpan elapsed = DateTime.Now - startingTime; TimeSpan remaining = TimeSpan.FromTicks((long)(elapsed.Ticks * ((epochs - epoch) / (double)epoch))); status.ElapsedTime = elapsed; status.TimeLeft = remaining; statusUpdate(status); } LabeledData getData(int indexInBatch) { int actualIndex = dataSetIndexes[currentBatchOffset + indexInBatch]; return(getTrainingData(actualIndex)); } }
/// <summary> /// Tests the network over a given test dataset. Returns the error ( sum(|a - y(x)|^2)/n ). The out param will count the data that was correctly categorized using a given function. /// </summary> private static double TestNetwork(DeepNeuralNetwork nn, int testingDataSetSize, GetNextDataDelegate getNextData, out int correctCount, CheckIfCorrectDelegate checkCorrect) { correctCount = 0; Vector <double> error = new DenseVector(nn.OutputLayer.GetNeuronCount()); for (int i = 0; i < testingDataSetSize; ++i) { LabeledData labeledData = getNextData(i); Vector <double> result = nn.ProcessInput(labeledData.InputValues); if (checkCorrect(result.AsArray(), labeledData.OutputValues.AsArray())) { ++correctCount; } Vector <double> diff = labeledData.OutputValues - result; error += diff.PointwiseMultiply(diff); } error = error.Divide(testingDataSetSize); return(error.Average()); }
/// <summary> /// Stochastic gradient descent algorithm. /// </summary> public static void Sgd(DeepNeuralNetwork nn, LabeledData[] trainingData, int batchSize, int epochs, double learningRate, UpdateTrainingStatusDelegate statusUpdate, LabeledData[] testingData = null, CheckIfCorrectDelegate checkCorrect = null) { if (testingData == null || checkCorrect == null) { SgdLazy(nn, trainingData.Length, getTrainingData, batchSize, epochs, learningRate, statusUpdate); } else { SgdLazy(nn, trainingData.Length, getTrainingData, batchSize, epochs, learningRate, statusUpdate, testingData.Length, getTestingData, checkCorrect); } LabeledData getTrainingData(int index) { return(trainingData[index]); } LabeledData getTestingData(int index) { return(testingData[index]); } }
/// <summary> /// Train the network using the given training set. The starting weights and biasses are the ones present in the network when the call to this function is made. /// </summary> public static void TrainWithBatch(DeepNeuralNetwork nn, IEnumerable <LabeledData> batch, int batchSize, double learningRate) { Matrix <double>[] nablaW; Vector <double>[] nablaB; (nablaW, nablaB) = GenParamsZero(nn); foreach (LabeledData trainingData in batch) { var delta = BackProp(nn, trainingData); for (int i = 0; i < nn.ComputedLayers.Length; ++i) { nablaW[i] += delta.nablaW[i]; nablaB[i] += delta.nablaB[i]; } } double ratio = learningRate / batchSize; for (int i = 0; i < nn.ComputedLayers.Length; ++i) { ComputedLayer layer = nn.ComputedLayers[i]; layer.Weights = layer.Weights - ratio * nablaW[i]; layer.Biasses = layer.Biasses - ratio * nablaB[i]; } }
private static (Matrix <double>[] nablaW, Vector <double>[] nablaB) BackProp(DeepNeuralNetwork nn, LabeledData trainingData) { Matrix <double>[] nablaW; Vector <double>[] nablaB; (nablaW, nablaB) = GenParamsZero(nn); //activation before applying sigm function Vector <double> z; List <Vector <double> > zs = new List <Vector <double> >(); //activation vector Vector <double> a = trainingData.InputValues; List <Vector <double> > activations = new List <Vector <double> >(); activations.Add(a); //feedforward foreach (ComputedLayer layer in nn.ComputedLayers) { z = layer.Weights * a + layer.Biasses; zs.Add(z); a = Utils.Sigmoid(z); activations.Add(a); } //backward pass Vector <double> delta = CostDerivative(activations.Last(), trainingData.OutputValues).PointwiseMultiply(Utils.SigmoidPrime(zs.Last())); nablaB[nn.ComputedLayers.Length - 1] = delta; nablaW[nn.ComputedLayers.Length - 1] = delta.ToColumnMatrix() * activations[activations.Count - 2].ToRowMatrix(); for (int i = nn.ComputedLayers.Length - 2; i >= 0; --i) { delta = nn.ComputedLayers[i + 1].Weights.TransposeThisAndMultiply(delta).PointwiseMultiply(Utils.SigmoidPrime(zs[i])); nablaB[i] = delta; nablaW[i] = delta.ToColumnMatrix() * activations[i].ToRowMatrix(); //note: activations[i] is actualy the activation of the previous layer since it counts the input layer as well } return(nablaW, nablaB); }
/// <summary> /// Creates matrices (for weights) and vectors (for biasses) initialized with 0 values coresponding to every computed layer of the NN. /// </summary> private static (Matrix <double>[] nablaW, Vector <double>[] nablaB) GenParamsZero(DeepNeuralNetwork nn) { Matrix <double>[] nablaW = new Matrix <double> [nn.ComputedLayers.Length]; Vector <double>[] nablaB = new Vector <double> [nn.ComputedLayers.Length]; for (int i = 0; i < nn.ComputedLayers.Length; ++i) { ComputedLayer layer = nn.ComputedLayers[i]; nablaW[i] = new DenseMatrix(layer.Weights.RowCount, layer.Weights.ColumnCount); nablaB[i] = new DenseVector(layer.Biasses.Count); } return(nablaW, nablaB); }
/// <summary> /// Train the network using the given training set. The starting weights and biasses are the ones present in the network when the call to this function is made. /// This "lazy" version means that each training value pair will be read (using the function given as a parameter) only when it has to be processed. /// Recomended when the size of one training data is very large and loading the whole batch would need too much memory. /// Obs.: The lazy loading of the data should be ensured by the function given. /// </summary> public static void TrainWithBatchLazy(DeepNeuralNetwork nn, int batchSize, GetNextDataDelegate getNextData, double learningRate) { LazyBatch batch = new LazyBatch(getNextData, batchSize); TrainWithBatch(nn, batch, batchSize, learningRate); }