Exemple #1
0
 public override void Descend()
 {
     //Calculate gradients
     Updates   = new double[KernelSize, KernelSize];
     AvgUpdate = 0;
     for (int i = 0; i < KernelSize; i++)
     {
         for (int ii = 0; ii < KernelSize; ii++)
         {
             Updates[i, ii] = Gradients[i, ii] * (2d / NN.BatchSize);
             //Root mean square propegation
             if (NN.UseRMSProp)
             {
                 RMSGrad[i, ii] = (RMSGrad[i, ii] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (Updates[i, ii] * Updates[i, ii]));
                 Updates[i, ii] = (Updates[i, ii] / (Math.Sqrt(RMSGrad[i, ii]) /* + NN.Infinitesimal*/));
             }
             Updates[i, ii] *= NN.LearningRate;
         }
     }
     //Gradient normalization
     if (NN.NormGradients)
     {
         Updates = Maths.Scale(NN.LearningRate, Maths.Normalize(Updates));
     }
     //Apply updates
     for (int i = 0; i < KernelSize; i++)
     {
         for (int ii = 0; ii < KernelSize; ii++)
         {
             Weights[i, ii] -= Updates[i, ii];
             AvgUpdate      -= Updates[i, ii];
             //Weight clipping
             if (NN.UseClipping)
             {
                 if (Weights[i, ii] > NN.ClipParameter)
                 {
                     Weights[i, ii] = NN.ClipParameter;
                 }
                 if (Weights[i, ii] < -NN.ClipParameter)
                 {
                     Weights[i, ii] = -NN.ClipParameter;
                 }
             }
         }
     }
     Gradients = new double[KernelSize, KernelSize];
 }
Exemple #2
0
        /// <summary>
        /// Adds two matrices together
        /// </summary>
        /// <param name="inputs1">Matrix 1</param>
        /// <param name="inputs2">Matrix 2</param>
        public void Calculate(List <double[]> inputs1, List <double[]> inputs2)
        {
            ZVals = new List <double[]>();
            if (inputs1.Count != inputs2.Count)
            {
                throw new Exception("List sizes do not match");
            }
            for (int b = 0; b < NN.BatchSize; b++)
            {
                if (inputs1[b].Length != inputs2[b].Length)
                {
                    throw new Exception("Array sizes do not match");
                }

                double[] output = new double[inputs1[b].Length];
                for (int i = 0; i < inputs1[b].Length; i++)
                {
                    output[i] = inputs1[b][i] + inputs2[b][i];
                }
                ZVals.Add(output);
            }
            //If normalizing, do so, but only if it won't return an all-zero matrix
            if (NN.NormOutputs && ZVals[0].Length > 1)
            {
                ZVals = Maths.Normalize(ZVals);
            }
            //Use the specified type of activation function
            if (ActivationFunction == 0)
            {
                Values = Maths.Tanh(ZVals); return;
            }
            if (ActivationFunction == 1)
            {
                Values = Maths.ReLu(ZVals); return;
            }
            else
            {
                Values = ZVals;
            }
        }
Exemple #3
0
 public override void Calculate(List <double[]> inputs, bool output)
 {
     ZVals = new List <double[]>();
     for (int i = 0; i < NN.BatchSize; i++)
     {
         ZVals.Add(Maths.Convert(Pool(Maths.Convert(inputs[i]), output)));
     }
     //If normalizing, do so, but only if it won't return an all-zero matrix
     if (NN.NormOutputs && ZVals[0].Length > 1)
     {
         ZVals = Maths.Normalize(ZVals);
     }
     //Use the specified type of activation function
     if (ActivationFunction == 0)
     {
         Values = Maths.Tanh(ZVals); return;
     }
     if (ActivationFunction == 1)
     {
         Values = Maths.ReLu(ZVals); return;
     }
     Values = ZVals;
 }
Exemple #4
0
 /// <summary>
 /// Calculates the dot product of the kernel and input matrix.
 /// Matrices should be size [x, y] and [y], respectively, where x is the output size and y is the latent space's size
 /// </summary>
 /// <param name="inputs">The input matrix</param>
 /// <param name="isoutput">Whether to use hyperbolic tangent on the output</param>
 /// <returns></returns>
 public override void Calculate(List <double[]> inputs, bool isoutput)
 {
     ZVals = new List <double[]>();
     for (int b = 0; b < NN.BatchSize; b++)
     {
         ZVals.Add(Maths.Convert(DownOrUp ? Convolve(Weights, Pad(Maths.Convert(inputs[b]))) : FullConvolve(Weights, Pad(Maths.Convert(inputs[b])))));
     }
     //If normalizing, do so, but only if it won't return an all-zero matrix
     if (NN.NormOutputs && ZVals[0].Length > 1)
     {
         ZVals = Maths.Normalize(ZVals);
     }
     //Use the specified type of activation function
     if (ActivationFunction == 0)
     {
         Values = Maths.Tanh(ZVals); return;
     }
     if (ActivationFunction == 1)
     {
         Values = Maths.ReLu(ZVals); return;
     }
     Values = ZVals;
 }
Exemple #5
0
 public override void Calculate(List <double[]> inputs, bool output)
 {
     ZVals = new List <double[]>();
     for (int b = 0; b < NN.BatchSize; b++)
     {
         var vals = new double[Length];
         for (int k = 0; k < Length; k++)
         {
             //Values = (weights * inputs) + biases
             for (int j = 0; j < InputLength; j++)
             {
                 vals[k] += Weights[k, j] * inputs[b][j];
             }
             //Output layers don't use biases
             if (!output)
             {
                 vals[k] += Biases[k];
             }
         }
         ZVals.Add(vals);
     }
     //If normalizing, do so, but only if it won't return an all-zero matrix
     if (NN.NormOutputs && ZVals[0].Length > 1)
     {
         ZVals = Maths.Normalize(ZVals);
     }
     //Use the specified type of activation function
     if (ActivationFunction == 0)
     {
         Values = Maths.Tanh(ZVals); return;
     }
     if (ActivationFunction == 1)
     {
         Values = Maths.ReLu(ZVals); return;
     }
     Values = ZVals;
 }
Exemple #6
0
        /// <summary>
        /// Test code to use the critic as a classifier
        /// </summary>
        public static void TestTrain(NN Critic, bool gradientnorm, int imgspeed, Form1 activeform)
        {
            int formupdateiterator = 0;

            //Test code to generate a new layer with predefined qualities

            //List<Layer> layers = new List<Layer>() { new ConvolutionLayer(4, 784) { DownOrUp = true, Stride = 1 }.Init(false), new ConvolutionLayer(3, 625){ DownOrUp = true, Stride = 1 }.Init(false),
            //    new ConvolutionLayer(2, 529){ DownOrUp = true, Stride = 1 }.Init(false), new FullyConnectedLayer(100, 484).Init(false), new FullyConnectedLayer(10, 100).Init(true) };
            //List<bool> tans = new List<bool>() { true, true, true, true, true};
            //List<bool> bns = new List<bool>() { false, false, false, false, false };
            //List<bool> ress = new List<bool>() { false, false, false, false, false };
            //NN Critic = new NN().Init(layers, tans, ress, bns);

            while (Training)
            {
                double mean                  = 0;
                double stddev                = 0;
                double score                 = 0;
                double perccorrect           = 0;
                List <List <double[]> > nums = new List <List <double[]> >();
                List <int> labels            = new List <int>();
                Random     r                 = new Random();
                for (int i = 0; i < 10; i++)
                {
                    var temp = new List <double[]>();
                    for (int j = 0; j < BatchSize; j++)
                    {
                        temp.Add(Maths.Normalize(IO.FindNextNumber(i)));
                        //var tmpmean = Maths.CalcMean(temp[j]);
                        //mean += tmpmean;
                        //stddev += Maths.CalcStdDev(temp[j], tmpmean);
                    }
                    nums.Add(temp);
                }

                //Batch normalization
                //mean /= 10 * batchsize; stddev /= 10 * batchsize;
                //for (int i = 0; i < 10; i++)
                //{
                //    nums[i] = Maths.BatchNormalize(nums[i], mean, stddev);
                //}

                //Foreach number
                for (int i = 0; i < 10; i++)
                {
                    Critic.Calculate(nums[i]);
                    //Foreach sample in the batch
                    for (int j = 0; j < BatchSize; j++)
                    {
                        double max   = -99;
                        int    guess = -1;
                        //Foreach output neuron
                        for (int k = 0; k < 10; k++)
                        {
                            var value = Critic.Layers[Critic.NumLayers - 1].Values[j][k];
                            score += Math.Pow(value - (k == i ? 1d : 0d), 2);
                            if (value > max)
                            {
                                max = value; guess = k;
                            }
                        }
                        perccorrect += guess == i ? 1d : 0d;
                        labels.Add(guess);
                    }
                    Critic.CalcGradients(nums[i], null, i, true);
                }

                score       /= (10 * BatchSize);
                perccorrect /= (10 * BatchSize);
                score        = Math.Sqrt(score);

                Critic.Update();

                //Report values to the front end
                if (Clear)
                {
                    Critic.Trials = 0; Critic.Error = 0; Critic.PercCorrect = 0; Clear = false;
                }

                Critic.Trials++;
                Critic.Error       = (Critic.Error * ((Critic.Trials) / (Critic.Trials + 1d))) + (score * (1d / (Critic.Trials)));
                Critic.PercCorrect = (Critic.PercCorrect * ((Critic.Trials) / (Critic.Trials + 1d))) + (perccorrect * (1d / (Critic.Trials)));

                //Update image (if applicable)
                if (formupdateiterator >= imgspeed)
                {
                    //Maths.Rescale(list8[0], mean8, stddev8);
                    int index  = r.Next(0, 10);
                    var values = Form1.Rescale(Maths.Convert(nums[index][0]));
                    var image  = new int[28, 28];
                    //Convert values to a 2d array
                    for (int i = 0; i < 28; i++)
                    {
                        for (int ii = 0; ii < 28; ii++)
                        {
                            image[ii, i] = (int)values[i, ii];
                        }
                    }
                    activeform.Invoke((Action) delegate
                    {
                        activeform.image  = image;
                        activeform.CScore = Critic.Error.ToString();
                        activeform.CPerc  = Critic.PercCorrect.ToString();
                        //Critic.Layers[Critic.NumLayers - 1].Values[0][index].ToString();
                        activeform.Label = labels[index].ToString();
                        if (Critic.Error > Form1.Cutoff)
                        {
                            Training = false;
                        }
                        if (IO.Reset)
                        {
                            IO.Reset = false;
                            activeform.Epoch++;
                        }
                    });
                    formupdateiterator = 0;
                }
                formupdateiterator++;
            }
            activeform.Invoke((Action) delegate
            {
                //Notify of being done training
                activeform.DoneTraining = true;
                //Reset errors
                activeform.CScore = null;
                activeform.GScore = null;
            });
        }
Exemple #7
0
        /// <summary>
        /// Applies the gradients to the weights as a batch
        /// </summary>
        /// <param name="batchsize">The number of trials run per cycle</param>
        /// <param name="clipparameter">What the max/min </param>
        /// <param name="RMSDecay">How quickly the RMS gradients decay</param>
        public override void Descend()
        {
            //Calculate gradients
            WUpdates = new double[Length, InputLength];
            BUpdates = new double[Length];

            for (int i = 0; i < Length; i++)
            {
                for (int ii = 0; ii < InputLength; ii++)
                {
                    //Normal gradient descent update
                    WUpdates[i, ii] = WeightGradient[i, ii] * (2d / NN.BatchSize);
                    //Root mean square propegation
                    if (NN.UseRMSProp)
                    {
                        WRMSGrad[i, ii] = (WRMSGrad[i, ii] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (WUpdates[i, ii] * WUpdates[i, ii]));
                        WUpdates[i, ii] = (WUpdates[i, ii] / (Math.Sqrt(WRMSGrad[i, ii]) /* + NN.Infinitesimal*/));
                    }
                    WUpdates[i, ii] *= NN.LearningRate;
                }
                //Normal gradient descent update
                BUpdates[i] = BiasGradient[i] * (2d / NN.BatchSize);
                //Root mean square propegation
                if (NN.UseRMSProp)
                {
                    BRMSGrad[i] = (BRMSGrad[i] * NN.RMSDecay) + ((1 - NN.RMSDecay) * (BUpdates[i] * BUpdates[i]));
                    BUpdates[i] = (BUpdates[i] / (Math.Sqrt(BRMSGrad[i]) /* + NN.Infinitesimal*/));
                }
                BUpdates[i] *= NN.LearningRate;
            }
            //Gradient normalization
            if (NN.NormGradients)
            {
                WUpdates = Maths.Scale(NN.LearningRate, Maths.Normalize(WUpdates));
                BUpdates = Maths.Scale(NN.LearningRate, Maths.Normalize(BUpdates));
            }
            //Apply updates
            for (int i = 0; i < Length; i++)
            {
                for (int ii = 0; ii < InputLength; ii++)
                {
                    //Update weight and average
                    Weights[i, ii] -= WUpdates[i, ii];
                    AvgGradient    -= WUpdates[i, ii];
                    //Weight clipping
                    if (NN.UseClipping)
                    {
                        if (Weights[i, ii] > NN.ClipParameter)
                        {
                            Weights[i, ii] = NN.ClipParameter;
                        }
                        if (Weights[i, ii] < -NN.ClipParameter)
                        {
                            Weights[i, ii] = -NN.ClipParameter;
                        }
                    }
                }
                Biases[i] -= BUpdates[i];
                //Bias clipping
                if (NN.UseClipping)
                {
                    if (Biases[i] > NN.ClipParameter)
                    {
                        Biases[i] = NN.ClipParameter;
                    }
                    if (Biases[i] < -NN.ClipParameter)
                    {
                        Biases[i] = -NN.ClipParameter;
                    }
                }
            }

            //Reset gradients
            WeightGradient = new double[Length, InputLength];
            BiasGradient   = new double[Length];
        }
Exemple #8
0
        /// <summary>
        /// Computes the error signal of the layer, also gradients if applicable
        /// </summary>
        /// <param name="input">Previous layer's values</param>
        /// <param name="output">Whether the layer is the output layer</param>
        /// <param name="loss">The loss of the layer</param>
        /// <param name="calcgradients">Whether or not to calculate gradients in the layer</param>
        public void Backprop(List <double[]> inputs, Layer outputlayer, double loss, bool calcgradients)
        {
            //Reset errors
            Errors = new List <double[]>();

            //Calculate errors
            if (outputlayer is null)
            {
                for (int j = 0; j < inputs.Count; j++)
                {
                    Errors.Add(new double[Length]);
                    for (int i = 0; i < Length; i++)
                    {
                        //(i == loss ? 1d : 0d)
                        Errors[j][i] = 2d * (Values[j][i] - loss);
                    }
                }
            }
            else
            {
                for (int i = 0; i < inputs.Count; i++)
                {
                    Errors.Add(new double[outputlayer.InputLength]);
                }
                if (outputlayer is SumLayer)
                {
                    //Errors with respect to the output of the convolution
                    //dl/do
                    for (int i = 0; i < outputlayer.ZVals.Count; i++)
                    {
                        for (int k = 0; k < outputlayer.Length; k++)
                        {
                            for (int j = 0; j < outputlayer.InputLength; j++)
                            {
                                Errors[i][j] += outputlayer.Errors[i][k];
                            }
                        }
                    }
                }

                //Apply tanhderriv, if applicable, to the output's zvals
                var outputZVals = outputlayer.ZVals;
                if (outputlayer.ActivationFunction == 0)
                {
                    outputZVals = Maths.TanhDerriv(outputlayer.ZVals);
                }
                if (outputlayer.ActivationFunction == 1)
                {
                    outputZVals = Maths.ReLuDerriv(outputlayer.ZVals);
                }

                if (outputlayer is FullyConnectedLayer)
                {
                    var FCLOutput = outputlayer as FullyConnectedLayer;
                    for (int i = 0; i < outputlayer.ZVals.Count; i++)
                    {
                        for (int k = 0; k < FCLOutput.Length; k++)
                        {
                            for (int j = 0; j < FCLOutput.InputLength; j++)
                            {
                                Errors[i][j] += FCLOutput.Weights[k, j] * outputZVals[i][k] * FCLOutput.Errors[i][k];
                            }
                        }
                    }
                }
                if (outputlayer is ConvolutionLayer)
                {
                    var CLOutput = outputlayer as ConvolutionLayer;
                    for (int i = 0; i < outputlayer.ZVals.Count; i++)
                    {
                        if ((outputlayer as ConvolutionLayer).DownOrUp)
                        {
                            Errors[i] = Maths.Convert(CLOutput.UnPad(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors[i]))));
                        }
                        else
                        {
                            Errors[i] = Maths.Convert(CLOutput.UnPad(CLOutput.Convolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors[i]))));
                        }
                    }

                    //Errors = Maths.Convert(CLOutput.UnPad(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors))));
                }
                if (outputlayer is PoolingLayer)
                {
                    var PLOutput = outputlayer as PoolingLayer;
                    for (int b = 0; b < NN.BatchSize; b++)
                    {
                        if (PLOutput.DownOrUp)
                        {
                            int iterator = 0;
                            var wets     = Maths.Convert(PLOutput.Weights);
                            for (int i = 0; i < Length; i++)
                            {
                                if (wets[i] == 0)
                                {
                                    continue;
                                }
                                Errors[b][i] = PLOutput.Errors[b][iterator];
                                iterator++;
                            }
                        }
                        else
                        {
                            //Sum the errors
                            double[,] outputerrors = Maths.Convert(PLOutput.Errors[b]);
                            int oel = outputerrors.GetLength(0);
                            int oew = outputerrors.GetLength(1);
                            double[,] errors = new double[oel / PLOutput.PoolSize, oew / PLOutput.PoolSize];
                            for (int i = 0; i < oel; i++)
                            {
                                for (int ii = 0; ii < oew; ii++)
                                {
                                    errors[i / PLOutput.PoolSize, ii / PLOutput.PoolSize] += outputerrors[i, ii];
                                }
                            }
                            Errors[b] = Maths.Convert(errors);
                        }
                    }
                }
            }
            //Normalize errors (if applicable)
            if (NN.NormErrors && Errors[0].Length > 1)
            {
                Errors = Maths.Normalize(Errors);
            }
            if (calcgradients)
            {
                if (this is FullyConnectedLayer)
                {
                    (this as FullyConnectedLayer).CalcGradients(inputs, outputlayer);
                }
                if (this is ConvolutionLayer)
                {
                    (this as ConvolutionLayer).CalcGradients(inputs, outputlayer);
                }
                if (this is PoolingLayer)
                {
                    return;
                }
                if (this is SumLayer)
                {
                    return;
                }
            }
        }