예제 #1
0
        /// <summary>Trains the current network on the given training data,
        /// with the given learning rate using Stochastic gradient descent.
        /// Covers the training data in mini-batches of the given size for each
        /// epoch.</summary>
        /// <param name="trainingData">The training data to train on.</param>
        /// <param name="learningRate">The constant rate of learning overal all
        /// epochs.</param>
        /// <param name="nEpochs">The number of epochs to train over.</param>
        /// <param name="miniBatchSize">The size of each mini-batch.</param>
        public void Train(TrainingData trainingData,
                          double learningRate, int nEpochs, int miniBatchSize, bool log = false)
        {
            miniBatchSize = Math.Min(miniBatchSize, trainingData.SampleSize);

            for (int i = 0; i < nEpochs; i++)
            {
                // Randomise the training data
                Console.WriteLine("Epoch " + i + " of " + nEpochs + ": Shuffling data...");
                trainingData.Shuffle(rng);

                /* Pick out as many mini-batches as we can to cover al training
                 * data */
                int            nMiniBatches = trainingData.SampleSize / miniBatchSize;
                TrainingData[] miniBatches  = new TrainingData[nMiniBatches];

                for (int j = 0; j < nMiniBatches; j++)
                {
                    miniBatches[j] = trainingData.GetMiniBatch(
                        j * miniBatchSize, miniBatchSize);
                }

                for (int j = 0; j < nMiniBatches; j++)
                {
                    TrainingData miniBatch = miniBatches[j];
                    if (log)
                    {
                        Console.WriteLine(
                            "Processing mini-batch " + j + " of " + nMiniBatches);
                    }
                    stepGradientDescent(miniBatch, learningRate);
                }
            }
        }
예제 #2
0
        /// <summary>Performs one step of gradient descent according to the
        /// current network's cost after training on the given mini-batch of
        /// training data. Updates all weights and biases according to the
        /// gradient of the cost function in the network's current
        /// configuration.</summary>
        /// <param name="miniBatch">The mini-batch of training data to use for
        /// determining the cost of the current network.</param>
        /// <param name="learningRate">The gradient descent step size.</param>
        private void stepGradientDescent(TrainingData miniBatch, double learningRate)
        {
            int miniBatchSize = miniBatch.SampleSize;

            Matrix[] weightsDirection = new Matrix[LayerCount - 1];
            Matrix[] biasesDirection  = new Matrix[LayerCount - 1];

            /* Initialise the direction matrices to be of the same dimensions
             * as the weights and biases */
            for (int i = 0; i < LayerCount - 1; i++)
            {
                weightsDirection[i] = new Matrix(weights[i].RowCount,
                                                 weights[i].ColumnCount);
                biasesDirection[i] = new Matrix(biases[i].RowCount, 1);
            }

            /* Sum the steps of weights and biases for each training example in
             * the mini-batch */
            for (int i = 0; i < miniBatchSize; i++)
            {
                Matrix input          = Matrix.FromArray(miniBatch.GetInput(i));
                Matrix expectedOutput = Matrix.FromArray(
                    miniBatch.GetExpectedOutput(i));

                Tuple <Matrix[], Matrix[]> direction = getCostGradient(
                    input, expectedOutput);

                for (int l = 0; l < LayerCount - 1; l++)
                {
                    weightsDirection[l] += direction.Item1[l];
                    biasesDirection[l]  += direction.Item2[l];
                }
            }

            for (int l = 0; l < LayerCount - 1; l++)
            {
                weights[l] -=
                    (learningRate / miniBatchSize) * weightsDirection[l];
                biases[l] -=
                    (learningRate / miniBatchSize) * biasesDirection[l];
            }
        }