예제 #1
0
 public override void Execute(MyAbstractWeightLayer layer)
 {
     if (layer.Connection == ConnectionType.FULLY_CONNECTED)
     {
         m_RMSPropUpdateKernel.SetupExecution(layer.Neurons);
         m_RMSPropUpdateKernel.Run(
             layer.Input,
             layer.Delta,
             layer.Weights,
             layer.PreviousWeightDelta,
             layer.Bias,
             layer.PreviousBiasDelta,
             Owner.RMS.TrainingRate,
             Owner.RMS.Momentum,
             Owner.L1,
             Owner.L2,
             layer.DropoutMask,
             layer.Input.Count,
             layer.Neurons,
             layer.MeanSquareWeight,
             layer.MeanSquareBias,
             Owner.RMS.SmoothingFactor
             );
     }
     else
     {
         MyLog.ERROR.WriteLine("No method provided to RMS propagate a " + layer.Connection + " connected MyAbstractWeightLayer in " + Owner);
     }
 }
예제 #2
0
        public override void Execute(MyAbstractWeightLayer layer)
        {
            if (layer.Connection == ConnectionType.FULLY_CONNECTED)
            {
                ComputeWeightGradientSum(layer);

                m_adadeltaUpdateKernel.SetupExecution(layer.Weights.Count);
                m_adadeltaUpdateKernel.Run(
                    layer.Input,
                    layer.Delta,
                    layer.WeightGradient,
                    layer.BiasGradient,
                    layer.Weights,
                    layer.Bias,
                    Owner.L1,
                    Owner.L2,
                    layer.DropoutMask,
                    layer.Neurons,
                    Owner.BatchSize,
                    layer.Weights.Count,
                    layer.MeanSquareWeight, layer.PreviousWeightDelta, layer.MeanSquareBias, layer.PreviousBiasDelta,
                    Owner.Adadelta.Ro, Owner.Adadelta.Epsilon
                    );
            }
            else if (layer.Connection == ConnectionType.GAUSSIAN)
            {
                // Gaussian hidden layer just propagates delta, no weight updates
            }
            else if (layer.Connection == ConnectionType.CONVOLUTION && layer is MyConvolutionLayer)
            {
                MyConvolutionLayer convLayer = (MyConvolutionLayer)layer;
                m_convAdadeltaUpdateKernel.SetupExecution(convLayer.Weights.Count);
                m_convAdadeltaUpdateKernel.Run(
                    convLayer.Weights,
                    convLayer.Bias,
                    convLayer.Delta,
                    convLayer.PaddedImage,
                    convLayer.InputWidth + convLayer.ZeroPadding + convLayer.ZeroPadding,
                    (convLayer.InputWidth + convLayer.ZeroPadding + convLayer.ZeroPadding) *
                    (convLayer.InputHeight + convLayer.ZeroPadding + convLayer.ZeroPadding),
                    convLayer.FilterWidth,
                    convLayer.FilterWidth * convLayer.FilterHeight,
                    convLayer.FilterWidth * convLayer.FilterHeight * convLayer.InputDepth,
                    convLayer.OutputWidth, convLayer.OutputHeight, convLayer.OutputWidth * convLayer.OutputHeight,
                    convLayer.HorizontalStride, convLayer.VerticalStride,
                    convLayer.L1Term, convLayer.L2Term,
                    convLayer.MeanSquareWeight, convLayer.PreviousWeightDelta, convLayer.MeanSquareBias,
                    convLayer.PreviousBiasDelta,
                    Owner.Adadelta.Ro, Owner.Adadelta.Epsilon,
                    Owner.BatchSize,
                    convLayer.Weights.Count
                    // should be equal to FilterWidth * FilterHeight * FilterCount * InputDepth
                    );
            }
            else
            {
                MyLog.ERROR.WriteLine("No method provided to Adadelta propagate a " + layer.Connection +
                                      " connected MyAbstractWeightLayer in " + Owner);
            }
        }
        public override void Execute()
        {
            Owner.Delta.Fill(0.0f);
            // number of neurons of ensemble is the same as for each input
            m_deltaKernel.SetConstantVariable <float>("Lambda", Lambda);

            int inputLayerCount = Owner.InputConnections.Count(x => x.From is MyAbstractWeightLayer);

            foreach (MyConnection connection in Owner.InputConnections)
            {
                if (connection.From is MyAbstractLayer)
                {
                    MyAbstractLayer prevLayer = connection.From as MyAbstractLayer;

                    if (prevLayer is MyAbstractWeightLayer)
                    {
                        MyAbstractWeightLayer prevWeightLayer = prevLayer as MyAbstractWeightLayer;

                        m_deltaKernel.Run(
                            (int)prevLayer.ActivationFunction,
                            prevWeightLayer.NeuronInput,
                            prevLayer.Output,
                            Owner.Output,
                            Owner.Neurons,
                            prevLayer.Delta,
                            Owner.Delta,
                            inputLayerCount
                            );
                    }
                    prevLayer.Delta.SafeCopyToHost();
                    Owner.Delta.SafeCopyToHost();
                }
            }
            Owner.Delta.SafeCopyToHost();
        }
예제 #4
0
        public void ComputeWeightGradientSum(MyAbstractWeightLayer layer)
        {
            if (Owner.BatchSize == 1) // cuBLAS tends to be slower when BatchSize is 1, gradient is computed in update weights kernels
                return;

            // WeightGradient = Delta x Transpose(Input)
            MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.Transpose,
                layer.Neurons, layer.Input.Count / Owner.BatchSize, Owner.BatchSize, 1.0f,
                layer.Delta.GetDevice(layer), layer.Neurons,
                layer.Input.GetDevice(layer), layer.Input.Count / Owner.BatchSize,
                0.0f, layer.WeightGradient.GetDevice(layer), layer.Neurons
                );
            
            // BiasGradient = Delta x Transpose(BiasInput). BiasInput is vector of ones
            MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.Transpose,
                layer.Neurons, 1, Owner.BatchSize, 1.0f,
                layer.Delta.GetDevice(layer), layer.Neurons,
                layer.BiasInput.GetDevice(layer), 1,
                0.0f, layer.BiasGradient.GetDevice(layer), layer.Neurons
                );
        }
예제 #5
0
        public override void Init(int nGPU) { } //Kernel initialization

        public override void Execute()
        {
            float maxRelDiff = 0.0f;
            float maxAbsDiff = 0.0f;
            int maxDiffLayer = 0;
            int maxDiffWeight = 0;
            float maxDiffWeightValue = 0.0f;
            float maxDiffStepSize = 0.0f;
            float maxDiffAnalyticalGrad = 0.0f;
            float maxDiffNumericalGrad = 0.0f;

            float sampleProbability = 1.0f / Owner.TotalWeights;
            for (int s = 0; s < SamplesPerTimestep; s++)
            {
                // dice roll
                float diceRoll = (float)Rand.NextDouble();

                // convert diceroll to parameter to sample
                int w = (int)System.Math.Floor(diceRoll / sampleProbability);
                if (w >= Owner.TotalWeights)
                {
                    if (w > Owner.TotalWeights)
                        MyLog.ERROR.Write("w > Owner.TotalWeights"); // just for testing, this should never hit
                    w = Owner.TotalWeights - 1; // this is just to make if safe, but it should never hit
                }

                // loop through the layers
                MyAbstractLayer layer = Owner.FirstTopologicalLayer;
                while (layer != null)
                {
                    // check for weights
                    if (layer is MyAbstractWeightLayer)
                    {
                        MyAbstractWeightLayer weightLayer = (layer as MyAbstractWeightLayer);
                        if (weightLayer.Weights.Count <= w)
                            w -= weightLayer.Weights.Count;
                        else
                        {
                            weightLayer.Weights.SafeCopyToHost(w, 1); // copy this weight to host
                            float originalWeight = weightLayer.Weights.Host[w]; // save weight
                            float stepSize = System.Math.Abs(originalWeight) * RelativeStepSize; // set stepSize

                            // get errorPlus
                            weightLayer.Weights.Host[w] = originalWeight + stepSize; // increase weight
                            weightLayer.Weights.SafeCopyToDevice(w, 1); // back to device
                            Owner.FeedForward(); // forward the network
                            float errorPlus = Owner.GetError();

                            // get errorMinus
                            weightLayer.Weights.Host[w] = originalWeight - stepSize; // decrease weight
                            weightLayer.Weights.SafeCopyToDevice(w, 1); // back to device
                            Owner.FeedForward(); // forward the network
                            float errorMinus = Owner.GetError();

                            // reset to original
                            weightLayer.Weights.Host[w] = originalWeight; // back to where we started
                            weightLayer.Weights.SafeCopyToDevice(w, 1); // back to device
                            Owner.FeedForward(); // forward the network
                            Owner.GetError(); // this sets the original error

                            // numerical gradient
                            float numericalGradient = (errorPlus - errorMinus) / (2 * stepSize);

                            if (numericalGradient == 0)
                            {
                                MyLog.DEBUG.WriteLine("t: " + SimulationStep + " id: " + weightLayer.Id + " w" + w + ": " + weightLayer.Weights.Host[w] + " step: " + stepSize + " numerical gradient is 0.");
                                break; // continue to next sample
                            }

                            // analytical gradient
                            int n = w % weightLayer.Neurons;
                            int i = (w - n) / weightLayer.Neurons;
                            weightLayer.Delta.SafeCopyToHost(n, 1); // copy delta to host
                            weightLayer.Input.SafeCopyToHost(i, 1); // copy input to host
                            weightLayer.DropoutMask.SafeCopyToHost(n, 1); // copy dropoutmask to host
                            //weightLayer.Weights.SafeCopyToHost(w, 1); // already present at host due to resetting to original
                            if (weightLayer.DropoutMask.Host[n] > 0)
                                break;
                            float analyticalGradient = weightLayer.Delta.Host[n] * weightLayer.Input.Host[i] + Owner.L1 * (weightLayer.Weights.Host[w] < 0.0f ? -1.0f : 1.0f) + Owner.L2 * weightLayer.Weights.Host[w];
                            float relativeDiff = 0.0f;
                            float absoluteDiff = 0.0f;
                            if (analyticalGradient == 0)
                            {
                                MyLog.DEBUG.WriteLine("t: " + SimulationStep + " id: " + weightLayer.Id + " w" + w + ": " + weightLayer.Weights.Host[w] + " step: " + stepSize + " analytical gradient is 0.");
                                break; // continue to next sample
                            }
                            absoluteDiff = System.Math.Abs(numericalGradient - analyticalGradient);
                            relativeDiff = absoluteDiff / (System.Math.Abs(numericalGradient) + System.Math.Abs(analyticalGradient));
                            if (relativeDiff > maxRelDiff && absoluteDiff > ThresholdAbsolute)
                            {
                                maxAbsDiff = absoluteDiff;
                                maxRelDiff = relativeDiff;
                                maxDiffLayer = weightLayer.Id;
                                maxDiffWeight = w;
                                maxDiffWeightValue = weightLayer.Weights.Host[w];
                                maxDiffStepSize = stepSize;
                                maxDiffAnalyticalGrad = analyticalGradient;
                                maxDiffNumericalGrad = numericalGradient;
                            }
                            MyLog.DEBUG.WriteLine("t: " + SimulationStep + " id: " + weightLayer.Id + " w" + w + ": " + weightLayer.Weights.Host[w] + " step: " + stepSize + " AG: " + analyticalGradient + " NG: " + numericalGradient + " diff: " + relativeDiff);
                            break; // continue to next sample
                        }
                    }
                    layer = layer.NextTopologicalLayer;

                    // catch unmatched dice-rolls
                    if (layer == null)
                        MyLog.ERROR.Write("GradientCheck task: Weight w " + w + " not found within " + Owner.TotalWeights + " total weights"); // just for testing, this should never hit
                }
            }
            // handle the relativeDiff we just found
            if (maxRelDiff > ThresholdRelative && maxRelDiff > ThresholdAbsolute)
            {
                MyLog.INFO.WriteLine("Gradient threshold exceeded on SimulationStep: " + SimulationStep);
                MyLog.INFO.WriteLine("Max analytical vs numerical relative gradient difference found in layer id " + maxDiffLayer + " for weight " + maxDiffWeight + ": " + maxDiffWeightValue + " with Step size: " + maxDiffStepSize);
                MyLog.INFO.WriteLine("Analytical gradient: " + maxDiffAnalyticalGrad + " Numerical gradient: " + maxDiffNumericalGrad + " Relative difference: " + maxRelDiff);
                MyLog.INFO.WriteLine();
            }
        }
예제 #6
0
        public override void Execute(MyAbstractWeightLayer layer)
        {
            if (layer.Connection == ConnectionType.FULLY_CONNECTED)
            {
                ComputeWeightGradientSum(layer);
                
                m_SGDupdateKernel.SetupExecution(layer.Weights.Count);
                m_SGDupdateKernel.Run(
                    layer.Input,
                    layer.Delta,
                    layer.WeightGradient,
                    layer.BiasGradient,
                    layer.Weights,
                    layer.PreviousWeightDelta,
                    layer.Bias,
                    layer.PreviousBiasDelta,
                    Owner.SGD.TrainingRate,
                    Owner.SGD.Momentum,
                    Owner.L1,
                    Owner.L2,
                    layer.DropoutMask,
                    layer.Neurons,
                    Owner.BatchSize,
                    layer.Weights.Count
                    );

            }
            else if (layer.Connection == ConnectionType.GAUSSIAN)
            {
                // Gaussian hidden layer just propagates delta, no weight updates
            }
            else if (layer.Connection == ConnectionType.PARTIAL_UPDATE && layer is IPartialUpdateLayer)
            {
                // Update some but not all of the weights
                IPartialUpdateLayer partialUpdateLayer = layer as IPartialUpdateLayer;

                m_partialSGDupdateKernel.SetupExecution(layer.Weights.Count);
                m_partialSGDupdateKernel.Run(
                    layer.Input,
                    layer.Delta,
                    layer.Weights,
                    layer.PreviousWeightDelta,
                    layer.Bias,
                    layer.PreviousBiasDelta,
                    Owner.SGD.TrainingRate,
                    Owner.SGD.Momentum,
                    Owner.L1,
                    Owner.L2,
                    layer.DropoutMask,
                    layer.Neurons,
                    layer.Weights.Count,
                    partialUpdateLayer.SuppressUpdatesAt(),
                    partialUpdateLayer.SuppressUpdatesCount()
                );
            }
            else if (layer.Connection == ConnectionType.CONVOLUTION && layer is MyConvolutionLayer)
            {
                MyConvolutionLayer convLayer = (MyConvolutionLayer) layer;
                m_convSGDupdateKernel.SetupExecution(convLayer.Weights.Count);
                m_convSGDupdateKernel.Run(
                    Owner.SGD.TrainingRate, Owner.SGD.Momentum,
                    convLayer.Weights,
                    convLayer.Bias, convLayer.PreviousBiasDelta,
                    convLayer.Delta, convLayer.PreviousWeightDelta,
                    convLayer.PaddedImage,
                    convLayer.InputWidth + convLayer.ZeroPadding + convLayer.ZeroPadding,
                    (convLayer.InputWidth + convLayer.ZeroPadding + convLayer.ZeroPadding)*
                    (convLayer.InputHeight + convLayer.ZeroPadding + convLayer.ZeroPadding),
                    convLayer.FilterWidth,
                    convLayer.FilterWidth*convLayer.FilterHeight,
                    convLayer.FilterWidth*convLayer.FilterHeight*convLayer.InputDepth,
                    convLayer.OutputWidth, convLayer.OutputHeight, convLayer.OutputWidth*convLayer.OutputHeight,
                    convLayer.HorizontalStride, convLayer.VerticalStride,
                    convLayer.L1Term, convLayer.L2Term,
                    Owner.BatchSize,
                    convLayer.Weights.Count
                    // should be equal to FilterWidth * FilterHeight * FilterCount * InputDepth
                    );
            }
            else
            {
                MyLog.ERROR.WriteLine("No method provided to SGD propagate a " + layer.Connection +
                                        " connected MyAbstractWeightLayer in " + Owner);
            }
        }
예제 #7
0
 public virtual void Execute(MyAbstractWeightLayer layer)
 {
     MyLog.ERROR.WriteLine("No method provided to backpropagate MyAbstractWeightLayer " + layer + " in " + Owner);
 }
예제 #8
0
        public override void Execute()
        {
            MyNode node = Owner.Input.Owner;

            if (node is MyAbstractLayer)
            {
                MyAbstractLayer previousLayer = node as MyAbstractLayer;

                // Reset delta
                previousLayer.Delta.Fill(0);

                // Disable backprop when in generative mode
                if (!Owner.Generate.IsIncomingRised())
                {
                    // Set locations for mean deltas
                    CUdeviceptr meanDeltas = previousLayer.Delta.GetDevicePtr(Owner, 0);
                    // Set locations for sigma deltas
                    CUdeviceptr sigmaDeltas = previousLayer.Delta.GetDevicePtr(Owner, previousLayer.Delta.Count / 2);
                    // Determine input to previous layer
                    CUdeviceptr prevInputPtr = MyAbstractLayer.DetermineInput(previousLayer);
                    // set locations for sigmas (prev layer or constant
                    CUdeviceptr sigmas;
                    if (Owner.UseSigmaConstant)
                    {
                        sigmas = Owner.SigmaConstants.GetDevicePtr(Owner);
                    }
                    else
                    {
                        sigmas = Owner.Input.GetDevicePtr(Owner, Owner.Input.Count / 2);
                    }

                    m_samplingDeltaKernel.Run(
                        Convert.ToInt32(Owner.UseSigmaConstant),
                        (int)previousLayer.ActivationFunction,
                        prevInputPtr,
                        sigmas,
                        meanDeltas,
                        sigmaDeltas,
                        Owner.Delta,
                        Owner.RandomNormal,
                        Owner.Neurons
                        );

                    // Regularization needs weights to compute gradients
                    if (Regularize && previousLayer != null && previousLayer is MyAbstractWeightLayer)
                    {
                        MyAbstractWeightLayer previousWeightLayer = previousLayer as MyAbstractWeightLayer;

                        // Try to regularize loss: mean^2 + sigma^2 - log(sigma^2)
                        // In other words regularize means to 0 and sigmas to 1
                        int weightCount = previousWeightLayer.Weights.Count;
                        m_regularizationDeltaKernel.SetConstantVariable <float>("RegularizationCoefficient", RegularizationCoefficient);
                        m_regularizationDeltaKernel.SetupExecution(weightCount);
                        m_regularizationDeltaKernel.Run(
                            Convert.ToInt32(Owner.UseSigmaConstant),
                            (int)previousLayer.ActivationFunction,
                            prevInputPtr,
                            previousLayer.Input,
                            previousWeightLayer.Weights,
                            previousLayer.Output.Count,
                            meanDeltas,
                            sigmaDeltas
                            );
                    }
                }
            }
        }