/// <summary> /// Update all error derivatives, and accumulate the appropriate weight deltas, /// given a reference to the gradient object of the next layer. /// </summary> /// <param name="nextLayerGradient"></param> public override void Backpropagate(FeedForwardLayer.Gradient nextLayerGradient, List <IRegulariser> regularisers) { throw new NotImplementedException(); }
/// <summary> /// Update all error derivatives, and accumulate the appropriate weight deltas, /// given a reference to the gradient object of the next layer. /// </summary> /// <param name="nextLayerGradient"></param> public override void Backpropagate(FeedForwardLayer.Gradient nextLayerGradient, List <IRegulariser> regularisers) { // Rate of change of output with respect to corresponding pre-activation Vector preActivationOutputDerivatives = new Vector(thisLayer.Outputs); for (int i = 0; i < preActivationOutputDerivatives.Length; i++) { preActivationOutputDerivatives[i] = 0; } for (int i = 0; i < thisLayer.Outputs; i++) { // Calculate output error derivatives; since this is a hidden layer, these // are equal to the input error derivatives of the next layer outputErrorDerivatives[i] = nextLayerGradient.inputErrorDerivatives[i]; } denseReference.Activation.Peek(denseReference.PreActivation); // Calculate the pre-activation partial derivatives if (denseReference.Activation.IsInterdependent()) { for (int i = 0; i < thisLayer.Outputs; i++) // i -> pre-activation index { for (int j = 0; j < thisLayer.Outputs; j++) // j -> output index { preActivationOutputDerivatives[i] = preActivationOutputDerivatives[i] + denseReference.Activation.Derivative(i, j); } } } else { for (int i = 0; i < thisLayer.Outputs; i++) { preActivationOutputDerivatives[i] = denseReference.Activation.Derivative(i, i); } } for (int i = 0; i < thisLayer.Inputs; i++) { // Calculate input error derivatives; the product of the derivative of the // pre-activation wrt. the input and the derivative of the output wrt. the // pre-activation, summed over all output neurons in this layer double inputErrorDerivative = 0; for (int j = 0; j < thisLayer.Outputs; j++) { // Add 1 to input index so that bias is skipped inputErrorDerivative += denseReference.Weights[j, i + 1] * preActivationOutputDerivatives[j] * outputErrorDerivatives[j]; } inputErrorDerivatives[i] = inputErrorDerivative; } // Cycle through each weight for (int i = 0; i < denseReference.Weights.Rows; i++) // i -> output neurons { for (int j = 0; j < denseReference.Weights.Columns; j++) // j -> input neurons { double regulariserDelta = 0; foreach (IRegulariser regulariser in regularisers) { regulariserDelta += regulariser.LossDerivative( denseReference.Weights[i, j]); } weightDeltas[i, j] = weightDeltas[i, j] + outputErrorDerivatives[i] * preActivationOutputDerivatives[i] * thisLayer.Input[j, 0] + regulariserDelta; } } }