public override void Descend() { //Calculate gradients Updates = new double[KernelSize, KernelSize]; AvgUpdate = 0; for (int i = 0; i < KernelSize; i++) { for (int ii = 0; ii < KernelSize; ii++) { Updates[i, ii] = Gradients[i, ii] * (2d / NN.BatchSize); //Root mean square propegation if (NN.UseRMSProp) { RMSGrad[i, ii] = (RMSGrad[i, ii] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (Updates[i, ii] * Updates[i, ii])); Updates[i, ii] = (Updates[i, ii] / (Math.Sqrt(RMSGrad[i, ii]) /* + NN.Infinitesimal*/)); } Updates[i, ii] *= NN.LearningRate; } } //Gradient normalization if (NN.NormGradients) { Updates = Maths.Scale(NN.LearningRate, Maths.Normalize(Updates)); } //Apply updates for (int i = 0; i < KernelSize; i++) { for (int ii = 0; ii < KernelSize; ii++) { Weights[i, ii] -= Updates[i, ii]; AvgUpdate -= Updates[i, ii]; //Weight clipping if (NN.UseClipping) { if (Weights[i, ii] > NN.ClipParameter) { Weights[i, ii] = NN.ClipParameter; } if (Weights[i, ii] < -NN.ClipParameter) { Weights[i, ii] = -NN.ClipParameter; } } } } Gradients = new double[KernelSize, KernelSize]; }
/// <summary> /// Applies the gradients to the weights as a batch /// </summary> /// <param name="batchsize">The number of trials run per cycle</param> /// <param name="clipparameter">What the max/min </param> /// <param name="RMSDecay">How quickly the RMS gradients decay</param> public override void Descend() { //Calculate gradients WUpdates = new double[Length, InputLength]; BUpdates = new double[Length]; for (int i = 0; i < Length; i++) { for (int ii = 0; ii < InputLength; ii++) { //Normal gradient descent update WUpdates[i, ii] = WeightGradient[i, ii] * (2d / NN.BatchSize); //Root mean square propegation if (NN.UseRMSProp) { WRMSGrad[i, ii] = (WRMSGrad[i, ii] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (WUpdates[i, ii] * WUpdates[i, ii])); WUpdates[i, ii] = (WUpdates[i, ii] / (Math.Sqrt(WRMSGrad[i, ii]) /* + NN.Infinitesimal*/)); } WUpdates[i, ii] *= NN.LearningRate; } //Normal gradient descent update BUpdates[i] = BiasGradient[i] * (2d / NN.BatchSize); //Root mean square propegation if (NN.UseRMSProp) { BRMSGrad[i] = (BRMSGrad[i] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (BUpdates[i] * BUpdates[i])); BUpdates[i] = (BUpdates[i] / (Math.Sqrt(BRMSGrad[i]) /* + NN.Infinitesimal*/)); } BUpdates[i] *= NN.LearningRate; } //Gradient normalization if (NN.NormGradients) { WUpdates = Maths.Scale(NN.LearningRate, Maths.Normalize(WUpdates)); BUpdates = Maths.Scale(NN.LearningRate, Maths.Normalize(BUpdates)); } //Apply updates for (int i = 0; i < Length; i++) { for (int ii = 0; ii < InputLength; ii++) { //Update weight and average Weights[i, ii] -= WUpdates[i, ii]; AvgGradient -= WUpdates[i, ii]; //Weight clipping if (NN.UseClipping) { if (Weights[i, ii] > NN.ClipParameter) { Weights[i, ii] = NN.ClipParameter; } if (Weights[i, ii] < -NN.ClipParameter) { Weights[i, ii] = -NN.ClipParameter; } } } Biases[i] -= BUpdates[i]; //Bias clipping if (NN.UseClipping) { if (Biases[i] > NN.ClipParameter) { Biases[i] = NN.ClipParameter; } if (Biases[i] < -NN.ClipParameter) { Biases[i] = -NN.ClipParameter; } } } //Reset gradients WeightGradient = new double[Length, InputLength]; BiasGradient = new double[Length]; }