public override void Descend() { //Calculate gradients Updates = new double[KernelSize, KernelSize]; AvgUpdate = 0; for (int i = 0; i < KernelSize; i++) { for (int ii = 0; ii < KernelSize; ii++) { Updates[i, ii] = Gradients[i, ii] * (2d / NN.BatchSize); //Root mean square propegation if (NN.UseRMSProp) { RMSGrad[i, ii] = (RMSGrad[i, ii] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (Updates[i, ii] * Updates[i, ii])); Updates[i, ii] = (Updates[i, ii] / (Math.Sqrt(RMSGrad[i, ii]) /* + NN.Infinitesimal*/)); } Updates[i, ii] *= NN.LearningRate; } } //Gradient normalization if (NN.NormGradients) { Updates = Maths.Scale(NN.LearningRate, Maths.Normalize(Updates)); } //Apply updates for (int i = 0; i < KernelSize; i++) { for (int ii = 0; ii < KernelSize; ii++) { Weights[i, ii] -= Updates[i, ii]; AvgUpdate -= Updates[i, ii]; //Weight clipping if (NN.UseClipping) { if (Weights[i, ii] > NN.ClipParameter) { Weights[i, ii] = NN.ClipParameter; } if (Weights[i, ii] < -NN.ClipParameter) { Weights[i, ii] = -NN.ClipParameter; } } } } Gradients = new double[KernelSize, KernelSize]; }
/// <summary> /// Adds two matrices together /// </summary> /// <param name="inputs1">Matrix 1</param> /// <param name="inputs2">Matrix 2</param> public void Calculate(List <double[]> inputs1, List <double[]> inputs2) { ZVals = new List <double[]>(); if (inputs1.Count != inputs2.Count) { throw new Exception("List sizes do not match"); } for (int b = 0; b < NN.BatchSize; b++) { if (inputs1[b].Length != inputs2[b].Length) { throw new Exception("Array sizes do not match"); } double[] output = new double[inputs1[b].Length]; for (int i = 0; i < inputs1[b].Length; i++) { output[i] = inputs1[b][i] + inputs2[b][i]; } ZVals.Add(output); } //If normalizing, do so, but only if it won't return an all-zero matrix if (NN.NormOutputs && ZVals[0].Length > 1) { ZVals = Maths.Normalize(ZVals); } //Use the specified type of activation function if (ActivationFunction == 0) { Values = Maths.Tanh(ZVals); return; } if (ActivationFunction == 1) { Values = Maths.ReLu(ZVals); return; } else { Values = ZVals; } }
public override void Calculate(List <double[]> inputs, bool output) { ZVals = new List <double[]>(); for (int i = 0; i < NN.BatchSize; i++) { ZVals.Add(Maths.Convert(Pool(Maths.Convert(inputs[i]), output))); } //If normalizing, do so, but only if it won't return an all-zero matrix if (NN.NormOutputs && ZVals[0].Length > 1) { ZVals = Maths.Normalize(ZVals); } //Use the specified type of activation function if (ActivationFunction == 0) { Values = Maths.Tanh(ZVals); return; } if (ActivationFunction == 1) { Values = Maths.ReLu(ZVals); return; } Values = ZVals; }
/// <summary> /// Calculates the dot product of the kernel and input matrix. /// Matrices should be size [x, y] and [y], respectively, where x is the output size and y is the latent space's size /// </summary> /// <param name="inputs">The input matrix</param> /// <param name="isoutput">Whether to use hyperbolic tangent on the output</param> /// <returns></returns> public override void Calculate(List <double[]> inputs, bool isoutput) { ZVals = new List <double[]>(); for (int b = 0; b < NN.BatchSize; b++) { ZVals.Add(Maths.Convert(DownOrUp ? Convolve(Weights, Pad(Maths.Convert(inputs[b]))) : FullConvolve(Weights, Pad(Maths.Convert(inputs[b]))))); } //If normalizing, do so, but only if it won't return an all-zero matrix if (NN.NormOutputs && ZVals[0].Length > 1) { ZVals = Maths.Normalize(ZVals); } //Use the specified type of activation function if (ActivationFunction == 0) { Values = Maths.Tanh(ZVals); return; } if (ActivationFunction == 1) { Values = Maths.ReLu(ZVals); return; } Values = ZVals; }
public override void Calculate(List <double[]> inputs, bool output) { ZVals = new List <double[]>(); for (int b = 0; b < NN.BatchSize; b++) { var vals = new double[Length]; for (int k = 0; k < Length; k++) { //Values = (weights * inputs) + biases for (int j = 0; j < InputLength; j++) { vals[k] += Weights[k, j] * inputs[b][j]; } //Output layers don't use biases if (!output) { vals[k] += Biases[k]; } } ZVals.Add(vals); } //If normalizing, do so, but only if it won't return an all-zero matrix if (NN.NormOutputs && ZVals[0].Length > 1) { ZVals = Maths.Normalize(ZVals); } //Use the specified type of activation function if (ActivationFunction == 0) { Values = Maths.Tanh(ZVals); return; } if (ActivationFunction == 1) { Values = Maths.ReLu(ZVals); return; } Values = ZVals; }
/// <summary> /// Test code to use the critic as a classifier /// </summary> public static void TestTrain(NN Critic, bool gradientnorm, int imgspeed, Form1 activeform) { int formupdateiterator = 0; //Test code to generate a new layer with predefined qualities //List<Layer> layers = new List<Layer>() { new ConvolutionLayer(4, 784) { DownOrUp = true, Stride = 1 }.Init(false), new ConvolutionLayer(3, 625){ DownOrUp = true, Stride = 1 }.Init(false), // new ConvolutionLayer(2, 529){ DownOrUp = true, Stride = 1 }.Init(false), new FullyConnectedLayer(100, 484).Init(false), new FullyConnectedLayer(10, 100).Init(true) }; //List<bool> tans = new List<bool>() { true, true, true, true, true}; //List<bool> bns = new List<bool>() { false, false, false, false, false }; //List<bool> ress = new List<bool>() { false, false, false, false, false }; //NN Critic = new NN().Init(layers, tans, ress, bns); while (Training) { double mean = 0; double stddev = 0; double score = 0; double perccorrect = 0; List <List <double[]> > nums = new List <List <double[]> >(); List <int> labels = new List <int>(); Random r = new Random(); for (int i = 0; i < 10; i++) { var temp = new List <double[]>(); for (int j = 0; j < BatchSize; j++) { temp.Add(Maths.Normalize(IO.FindNextNumber(i))); //var tmpmean = Maths.CalcMean(temp[j]); //mean += tmpmean; //stddev += Maths.CalcStdDev(temp[j], tmpmean); } nums.Add(temp); } //Batch normalization //mean /= 10 * batchsize; stddev /= 10 * batchsize; //for (int i = 0; i < 10; i++) //{ // nums[i] = Maths.BatchNormalize(nums[i], mean, stddev); //} //Foreach number for (int i = 0; i < 10; i++) { Critic.Calculate(nums[i]); //Foreach sample in the batch for (int j = 0; j < BatchSize; j++) { double max = -99; int guess = -1; //Foreach output neuron for (int k = 0; k < 10; k++) { var value = Critic.Layers[Critic.NumLayers - 1].Values[j][k]; score += Math.Pow(value - (k == i ? 1d : 0d), 2); if (value > max) { max = value; guess = k; } } perccorrect += guess == i ? 1d : 0d; labels.Add(guess); } Critic.CalcGradients(nums[i], null, i, true); } score /= (10 * BatchSize); perccorrect /= (10 * BatchSize); score = Math.Sqrt(score); Critic.Update(); //Report values to the front end if (Clear) { Critic.Trials = 0; Critic.Error = 0; Critic.PercCorrect = 0; Clear = false; } Critic.Trials++; Critic.Error = (Critic.Error * ((Critic.Trials) / (Critic.Trials + 1d))) + (score * (1d / (Critic.Trials))); Critic.PercCorrect = (Critic.PercCorrect * ((Critic.Trials) / (Critic.Trials + 1d))) + (perccorrect * (1d / (Critic.Trials))); //Update image (if applicable) if (formupdateiterator >= imgspeed) { //Maths.Rescale(list8[0], mean8, stddev8); int index = r.Next(0, 10); var values = Form1.Rescale(Maths.Convert(nums[index][0])); var image = new int[28, 28]; //Convert values to a 2d array for (int i = 0; i < 28; i++) { for (int ii = 0; ii < 28; ii++) { image[ii, i] = (int)values[i, ii]; } } activeform.Invoke((Action) delegate { activeform.image = image; activeform.CScore = Critic.Error.ToString(); activeform.CPerc = Critic.PercCorrect.ToString(); //Critic.Layers[Critic.NumLayers - 1].Values[0][index].ToString(); activeform.Label = labels[index].ToString(); if (Critic.Error > Form1.Cutoff) { Training = false; } if (IO.Reset) { IO.Reset = false; activeform.Epoch++; } }); formupdateiterator = 0; } formupdateiterator++; } activeform.Invoke((Action) delegate { //Notify of being done training activeform.DoneTraining = true; //Reset errors activeform.CScore = null; activeform.GScore = null; }); }
/// <summary> /// Applies the gradients to the weights as a batch /// </summary> /// <param name="batchsize">The number of trials run per cycle</param> /// <param name="clipparameter">What the max/min </param> /// <param name="RMSDecay">How quickly the RMS gradients decay</param> public override void Descend() { //Calculate gradients WUpdates = new double[Length, InputLength]; BUpdates = new double[Length]; for (int i = 0; i < Length; i++) { for (int ii = 0; ii < InputLength; ii++) { //Normal gradient descent update WUpdates[i, ii] = WeightGradient[i, ii] * (2d / NN.BatchSize); //Root mean square propegation if (NN.UseRMSProp) { WRMSGrad[i, ii] = (WRMSGrad[i, ii] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (WUpdates[i, ii] * WUpdates[i, ii])); WUpdates[i, ii] = (WUpdates[i, ii] / (Math.Sqrt(WRMSGrad[i, ii]) /* + NN.Infinitesimal*/)); } WUpdates[i, ii] *= NN.LearningRate; } //Normal gradient descent update BUpdates[i] = BiasGradient[i] * (2d / NN.BatchSize); //Root mean square propegation if (NN.UseRMSProp) { BRMSGrad[i] = (BRMSGrad[i] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (BUpdates[i] * BUpdates[i])); BUpdates[i] = (BUpdates[i] / (Math.Sqrt(BRMSGrad[i]) /* + NN.Infinitesimal*/)); } BUpdates[i] *= NN.LearningRate; } //Gradient normalization if (NN.NormGradients) { WUpdates = Maths.Scale(NN.LearningRate, Maths.Normalize(WUpdates)); BUpdates = Maths.Scale(NN.LearningRate, Maths.Normalize(BUpdates)); } //Apply updates for (int i = 0; i < Length; i++) { for (int ii = 0; ii < InputLength; ii++) { //Update weight and average Weights[i, ii] -= WUpdates[i, ii]; AvgGradient -= WUpdates[i, ii]; //Weight clipping if (NN.UseClipping) { if (Weights[i, ii] > NN.ClipParameter) { Weights[i, ii] = NN.ClipParameter; } if (Weights[i, ii] < -NN.ClipParameter) { Weights[i, ii] = -NN.ClipParameter; } } } Biases[i] -= BUpdates[i]; //Bias clipping if (NN.UseClipping) { if (Biases[i] > NN.ClipParameter) { Biases[i] = NN.ClipParameter; } if (Biases[i] < -NN.ClipParameter) { Biases[i] = -NN.ClipParameter; } } } //Reset gradients WeightGradient = new double[Length, InputLength]; BiasGradient = new double[Length]; }
/// <summary> /// Computes the error signal of the layer, also gradients if applicable /// </summary> /// <param name="input">Previous layer's values</param> /// <param name="output">Whether the layer is the output layer</param> /// <param name="loss">The loss of the layer</param> /// <param name="calcgradients">Whether or not to calculate gradients in the layer</param> public void Backprop(List <double[]> inputs, Layer outputlayer, double loss, bool calcgradients) { //Reset errors Errors = new List <double[]>(); //Calculate errors if (outputlayer is null) { for (int j = 0; j < inputs.Count; j++) { Errors.Add(new double[Length]); for (int i = 0; i < Length; i++) { //(i == loss ? 1d : 0d) Errors[j][i] = 2d * (Values[j][i] - loss); } } } else { for (int i = 0; i < inputs.Count; i++) { Errors.Add(new double[outputlayer.InputLength]); } if (outputlayer is SumLayer) { //Errors with respect to the output of the convolution //dl/do for (int i = 0; i < outputlayer.ZVals.Count; i++) { for (int k = 0; k < outputlayer.Length; k++) { for (int j = 0; j < outputlayer.InputLength; j++) { Errors[i][j] += outputlayer.Errors[i][k]; } } } } //Apply tanhderriv, if applicable, to the output's zvals var outputZVals = outputlayer.ZVals; if (outputlayer.ActivationFunction == 0) { outputZVals = Maths.TanhDerriv(outputlayer.ZVals); } if (outputlayer.ActivationFunction == 1) { outputZVals = Maths.ReLuDerriv(outputlayer.ZVals); } if (outputlayer is FullyConnectedLayer) { var FCLOutput = outputlayer as FullyConnectedLayer; for (int i = 0; i < outputlayer.ZVals.Count; i++) { for (int k = 0; k < FCLOutput.Length; k++) { for (int j = 0; j < FCLOutput.InputLength; j++) { Errors[i][j] += FCLOutput.Weights[k, j] * outputZVals[i][k] * FCLOutput.Errors[i][k]; } } } } if (outputlayer is ConvolutionLayer) { var CLOutput = outputlayer as ConvolutionLayer; for (int i = 0; i < outputlayer.ZVals.Count; i++) { if ((outputlayer as ConvolutionLayer).DownOrUp) { Errors[i] = Maths.Convert(CLOutput.UnPad(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors[i])))); } else { Errors[i] = Maths.Convert(CLOutput.UnPad(CLOutput.Convolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors[i])))); } } //Errors = Maths.Convert(CLOutput.UnPad(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors)))); } if (outputlayer is PoolingLayer) { var PLOutput = outputlayer as PoolingLayer; for (int b = 0; b < NN.BatchSize; b++) { if (PLOutput.DownOrUp) { int iterator = 0; var wets = Maths.Convert(PLOutput.Weights); for (int i = 0; i < Length; i++) { if (wets[i] == 0) { continue; } Errors[b][i] = PLOutput.Errors[b][iterator]; iterator++; } } else { //Sum the errors double[,] outputerrors = Maths.Convert(PLOutput.Errors[b]); int oel = outputerrors.GetLength(0); int oew = outputerrors.GetLength(1); double[,] errors = new double[oel / PLOutput.PoolSize, oew / PLOutput.PoolSize]; for (int i = 0; i < oel; i++) { for (int ii = 0; ii < oew; ii++) { errors[i / PLOutput.PoolSize, ii / PLOutput.PoolSize] += outputerrors[i, ii]; } } Errors[b] = Maths.Convert(errors); } } } } //Normalize errors (if applicable) if (NN.NormErrors && Errors[0].Length > 1) { Errors = Maths.Normalize(Errors); } if (calcgradients) { if (this is FullyConnectedLayer) { (this as FullyConnectedLayer).CalcGradients(inputs, outputlayer); } if (this is ConvolutionLayer) { (this as ConvolutionLayer).CalcGradients(inputs, outputlayer); } if (this is PoolingLayer) { return; } if (this is SumLayer) { return; } } }