protected void EstimateWeightSecondDerivative(RectangularStep upstream, RectangularStep downstream, int downstreamX, int downstreamY) { double weight2ndDerivative = 0; int downstreamIndex = downstreamY * downstream.Width + downstreamX; int upstreamIndex = (downstreamY * Height * upstream.Width) + downstreamX * Width; double downstreamSecondDerivative = downstream.ErrorDerivative[downstreamIndex]; double upstreamSecondDerivative = Weight * Weight * downstreamSecondDerivative; // This loop here is equivalent to the sigma in equation 19 in Gradient-Based Learning Applied to Document Recognition. for (int y = 0; y < Height; y++) { for (int x = 0; x < Width; x++) { double upstreamState = upstream.Output[upstreamIndex]; // Here we calculate (d^2)Ej/(dWij)^2 by multiplying the 2nd derivative of E with respect to the sum of inputs, Ai // by the state of Oi, the upstream unit, squared. Refer to Equation 25 in document. // The summing happening here is described by equation 23. weight2ndDerivative += downstreamSecondDerivative * upstreamState * upstreamState; // This is implementing the last sigma of Equation 27. // This propogates error second derivatives back to previous layer, but will need to be multiplied by the second derivative // of the activation function at the previous layer. upstream.ErrorDerivative[upstreamIndex] = upstreamSecondDerivative; upstreamIndex += 1; } upstreamIndex += upstream.Width - Width; } WeightStepSize += weight2ndDerivative; }
protected void PropogateError(RectangularStep upstream, RectangularStep downstream, int weightX, int weightY, int mapNumber) { int weightIndex = mapNumber * Width * Height + weightY * Width + weightX; double weight = Weight[weightIndex]; int downstreamIndex = 0; double weightError = 0.0; int upstreamIndex = (weightY * upstream.Width) + weightX; double weightStepSize = WeightStepSize[weightIndex]; // This loop here is equivalent to the sigma in equation 19 in Gradient-Based Learning Applied to Document Recognition. for (int y = 0; y < downstream.Height; y++) { for (int x = 0; x < downstream.Width; x++) { double upstreamState = upstream.Output[upstreamIndex]; double downstreamErrorDerivative = downstream.ErrorDerivative[downstreamIndex]; // Calculate inputs error gradient by taking the sum, for all outputs of // dEk/dAj multiplied by dAj/dOj (w/sum =dEj/dOj); double inputError = downstreamErrorDerivative * weight; upstream.ErrorDerivative[upstreamIndex] += inputError; // Calculate the Weight's first derivative with respect to the error double weightErrorGradient = downstreamErrorDerivative * upstreamState; weightError += weightErrorGradient; downstreamIndex += 1; upstreamIndex += 1; } upstreamIndex += Width - 1; // Equal to: upstream.Width - downstream.Width; } double deltaWeight = weightError * weightStepSize; Weight[weightIndex] -= deltaWeight; }
protected override void EstimateBiasSecondDerivative(RectangularStep downstream) { for (int i = 0; i < downstream.Length; i++) { // Calculating the sum of: second derivatives of error with respect to the bias weight. // Note that the bias is implemented as an always-on Neuron with a (the same) weight to the outputs neurons. BiasStepSize += downstream.ErrorDerivative[i] * 1.0 * 1.0; } }
protected override void TrainCore(Step downstream) { RectangularStep step = (RectangularStep)downstream; Debug.Assert(MapCount == downstream.Upstream.Count); for (int i = 0; i < MapCount; i++) { PropogateError(step, i); } }
protected override void PropogateError(RectangularStep downstream, int mapNumber) { RectangularStep upstream = downstream.Upstream[mapNumber]; for (int y = 0; y < Height; y++) { for (int x = 0; x < Width; x++) { PropogateError(upstream, downstream, x, y, mapNumber); } } }
protected override void PropogateUnitSecondDerivatives(RectangularStep downstream, int mapNumber) { RectangularStep upstream = downstream.Upstream[mapNumber]; for (int y = 0; y < Height; y++) { for (int x = 0; x < Width; x++) { PropogateUnitSecondDerivatives(upstream, downstream, x, y, mapNumber); } } }
protected override void PreTrainCore(Step downstream) { RectangularStep step = (RectangularStep)downstream; Debug.Assert(MapCount == downstream.Upstream.Count); for (int i = 0; i < MapCount; i++) { PropogateUnitSecondDerivatives(step, i); } EstimateBiasSecondDerivative(step); }
protected override void PropogateUnitSecondDerivatives(RectangularStep downstream, int upstreamIndex) { RectangularStep upstream = downstream.Upstream[upstreamIndex]; for (int y = 0; y < downstream.Height; y++) { for (int x = 0; x < downstream.Width; x++) { EstimateWeightSecondDerivative(upstream, downstream, x, y); } } }
protected override void PropogateError(RectangularStep downstream, int mapNumber) { RectangularStep upstream = downstream.Upstream[mapNumber]; for (int y = 0; y < downstream.Height; y++) { for (int x = 0; x < downstream.Width; x++) { PropogateError(downstream, upstream, x, y); } } }
protected override void PropogateForward(RectangularStep downstream, int mapNumber) { RectangularStep upstream = downstream.Upstream[mapNumber]; int index = 0; for (int y = 0; y < downstream.Height; y++) { for (int x = 0; x < downstream.Width; x++) { downstream.WeightedInputs[index++] += PropogateForward(upstream, x * Width, y * Height, mapNumber); } } }
protected double PropogateForward(RectangularStep upstream, int upstreamX, int upstreamY, int mapNumber) { Debug.Assert(upstreamX + Width <= upstream.Width); // Check we are staying within the width limit of the step. Debug.Assert(upstreamY + Height <= upstream.Height); // Check we are staying within the height limit of the step. double result = Bias; int upstreamIndex = (upstreamY * upstream.Width) + upstreamX; for (int y = 0; y < Height; y++) { for (int x = 0; x < Width; x++) { result += upstream.Output[upstreamIndex] * Weight; upstreamIndex += 1; } upstreamIndex += upstream.Width - Width; } return(result); }
protected void PropogateError(RectangularStep downstream, RectangularStep upstream, int downstreamX, int downstreamY) { int downstreamIndex = downstreamY * downstream.Width + downstreamX; int upstreamIndex = (downstreamY * Height * upstream.Width) + downstreamX * Width; double downstreamErrorDerivative = downstream.ErrorDerivative[downstreamIndex]; double upstreamError = Weight * downstreamErrorDerivative; double weightError = 0.0; for (int y = 0; y < Height; y++) { for (int x = 0; x < Width; x++) { upstream.ErrorDerivative[upstreamIndex] = upstreamError; double weightErrorGradient = downstreamErrorDerivative * upstream.Output[upstreamIndex]; weightError += weightErrorGradient; upstreamIndex += 1; } upstreamIndex += upstream.Width - Width; } Weight -= weightError * WeightStepSize; }
protected void PropogateUnitSecondDerivatives(RectangularStep upstream, RectangularStep downstream, int weightX, int weightY, int mapNumber) { double weight2ndDerivative = 0; int weightIndex = mapNumber * Width * Height + weightY * Width + weightX; double weight = Weight[weightIndex]; int downstreamIndex = 0; int upstreamIndex = (weightY * upstream.Width) + weightX; // This loop here is equivalent to the sigma in equation 19 in Gradient-Based Learning Applied to Document Recognition. for (int y = 0; y < downstream.Height; y++) { for (int x = 0; x < downstream.Width; x++) { double upstreamInput = upstream.Output[upstreamIndex]; double downstreamError2ndDerivative = downstream.ErrorDerivative[downstreamIndex]; // (d^2)E/(dAj)^2, where Aj is the sum of inputs to this downstream unit. // Here we calculate (d^2)Ej/(dWij)^2 by multiplying the 2nd derivative of E with respect to the sum of inputs, Aj // by the state of Oi, the upstream unit, squared. Refer to Equation 25 in document. // The summing happening here is described by equation 23. weight2ndDerivative += downstreamError2ndDerivative * upstreamInput * upstreamInput; // This is implementing the last sigma of Equation 27. // This propogates error second derivatives back to previous layer, but will need to be multiplied by the second derivative // of the activation function at the previous layer. upstream.ErrorDerivative[upstreamIndex] += weight * weight * downstreamError2ndDerivative; downstreamIndex += 1; upstreamIndex += 1; } upstreamIndex += Width - 1; // Equal to: upstream.Width - downstream.Width; } WeightStepSize[weightIndex] += weight2ndDerivative; }
protected abstract void PropogateForward(RectangularStep step, int mapNumber);
protected abstract void EstimateBiasSecondDerivative(RectangularStep downstream);
protected abstract void PropogateUnitSecondDerivatives(RectangularStep downstream, int mapNumber);
public SubsamplingStep(RectangularStep upstream, int subsamplingSize) : this(new[] { upstream}, subsamplingSize) { }
protected double PropogateForward(RectangularStep upstream, int upstreamX, int upstreamY, int mapNumber) { Debug.Assert(upstreamX + Width <= upstream.Width); // Check we are staying within the width limit of the step. Debug.Assert(upstreamY + Height <= upstream.Height); // Check we are staying within the height limit of the step. double result = Bias; int upstreamIndex = (upstreamY * upstream.Width) + upstreamX; for (int y = 0; y < Height; y++) { for (int x = 0; x < Width; x++) { result += upstream.Output[upstreamIndex] * Weight; upstreamIndex += 1; } upstreamIndex += upstream.Width - Width; } return result; }
public ConvolutionStep(RectangularStep upstream, int convolutionSize) : this(new [] {upstream}, convolutionSize, convolutionSize) { }
protected abstract void PropogateError(RectangularStep downstream, int mapNumber);
public SubsamplingStep(RectangularStep upstream, int subsamplingWidth, int subsamplingHeight) : this(new[] { upstream }, subsamplingWidth, subsamplingHeight) { }
public ConvolutionStep(RectangularStep upstream, int convolutionWidth, int convolutionHeight) : this(new[] { upstream }, convolutionWidth, convolutionHeight) { }
public ConvolutionStep(RectangularStep upstream, int convolutionSize) : this(new [] { upstream }, convolutionSize, convolutionSize) { }
public StepSnapshot(RectangularStep step) : this(step, step.Width) { }
public SubsamplingStep(RectangularStep upstream, int subsamplingSize) : this(new[] { upstream }, subsamplingSize) { }