/// <summary>Trains the current network on the given training data, /// with the given learning rate using Stochastic gradient descent. /// Covers the training data in mini-batches of the given size for each /// epoch.</summary> /// <param name="trainingData">The training data to train on.</param> /// <param name="learningRate">The constant rate of learning overal all /// epochs.</param> /// <param name="nEpochs">The number of epochs to train over.</param> /// <param name="miniBatchSize">The size of each mini-batch.</param> public void Train(TrainingData trainingData, double learningRate, int nEpochs, int miniBatchSize, bool log = false) { miniBatchSize = Math.Min(miniBatchSize, trainingData.SampleSize); for (int i = 0; i < nEpochs; i++) { // Randomise the training data Console.WriteLine("Epoch " + i + " of " + nEpochs + ": Shuffling data..."); trainingData.Shuffle(rng); /* Pick out as many mini-batches as we can to cover al training * data */ int nMiniBatches = trainingData.SampleSize / miniBatchSize; TrainingData[] miniBatches = new TrainingData[nMiniBatches]; for (int j = 0; j < nMiniBatches; j++) { miniBatches[j] = trainingData.GetMiniBatch( j * miniBatchSize, miniBatchSize); } for (int j = 0; j < nMiniBatches; j++) { TrainingData miniBatch = miniBatches[j]; if (log) { Console.WriteLine( "Processing mini-batch " + j + " of " + nMiniBatches); } stepGradientDescent(miniBatch, learningRate); } } }
/// <summary>Performs one step of gradient descent according to the /// current network's cost after training on the given mini-batch of /// training data. Updates all weights and biases according to the /// gradient of the cost function in the network's current /// configuration.</summary> /// <param name="miniBatch">The mini-batch of training data to use for /// determining the cost of the current network.</param> /// <param name="learningRate">The gradient descent step size.</param> private void stepGradientDescent(TrainingData miniBatch, double learningRate) { int miniBatchSize = miniBatch.SampleSize; Matrix[] weightsDirection = new Matrix[LayerCount - 1]; Matrix[] biasesDirection = new Matrix[LayerCount - 1]; /* Initialise the direction matrices to be of the same dimensions * as the weights and biases */ for (int i = 0; i < LayerCount - 1; i++) { weightsDirection[i] = new Matrix(weights[i].RowCount, weights[i].ColumnCount); biasesDirection[i] = new Matrix(biases[i].RowCount, 1); } /* Sum the steps of weights and biases for each training example in * the mini-batch */ for (int i = 0; i < miniBatchSize; i++) { Matrix input = Matrix.FromArray(miniBatch.GetInput(i)); Matrix expectedOutput = Matrix.FromArray( miniBatch.GetExpectedOutput(i)); Tuple <Matrix[], Matrix[]> direction = getCostGradient( input, expectedOutput); for (int l = 0; l < LayerCount - 1; l++) { weightsDirection[l] += direction.Item1[l]; biasesDirection[l] += direction.Item2[l]; } } for (int l = 0; l < LayerCount - 1; l++) { weights[l] -= (learningRate / miniBatchSize) * weightsDirection[l]; biases[l] -= (learningRate / miniBatchSize) * biasesDirection[l]; } }