internal override void PropagateBackward(NNDetailedBackpropagationData backpropagationData) { float[] dE_dz = new float[Neurons]; for (int oIdx = 0; oIdx < Neurons; oIdx++) { float dE_do; if (NextLayer == null) { // Output of this neuron float o = backpropagationData.FeedForwardData[this].OutputData[oIdx]; // Gradient of the error with respect to the neurons's output on the last layer is the gradient of the error function with the neuron's output dE_do = backpropagationData.ErrorGradient(o, oIdx); } else { // Gradient of error with respect to the neuron's output on any other layer is the sum // of each weight connecting this neuron to every neuron in the next layer // multiplied by that neuron's gradient of the error with respect to that neuron's raw output dE_do = NextLayer.CalculateErrorByOutputGradient(backpropagationData, oIdx); } // Gradient of the error with respect to the raw output is the gradient of the error with respect to the output // multiplied by the gradient of the output with respect to the raw output. // Gradient of the output with respect to the neuron raw output (do/dz) is just one (as the activation function is f(x) = x and thus df(x)/dx = 1 dE_dz[oIdx] = dE_do; } backpropagationData.dE_dz[this] = dE_dz; backpropagationData.UpdatedWeights[this] = new float[0]; }
internal override void PropagateBackward(NNDetailedBackpropagationData backpropagationData) { float[] lastRawOutput = backpropagationData.FeedForwardData[this].RawOutputData; float[] dE_dz = new float[Neurons]; float[] newWeights = new float[this.weights.Length]; // For each filter for (int fIdx = 0; fIdx < FilterCount; fIdx++) { float dE_db = 0; float[] dE_dw = new float[FilterSize * FilterSize * PreviousLayer.Depth]; for (int yIdx = 0; yIdx < Height; yIdx++) { for (int xIdx = 0; xIdx < Width; xIdx++) { int oIdx = ConvertToNeuronIndex(xIdx, yIdx, fIdx); float do_dz = Activation.Gradient(lastRawOutput[oIdx], lastRawOutput); float dE_do; if (NextLayer == null) { float o = backpropagationData.FeedForwardData[this].OutputData[oIdx]; dE_do = backpropagationData.ErrorGradient(o, oIdx); } else { dE_do = NextLayer.CalculateErrorByOutputGradient(backpropagationData, oIdx); } float dE_dz_tmp = dE_do * do_dz; dE_dz[oIdx] = dE_dz_tmp; for (int fzIdx = 0; fzIdx < PreviousLayer.Depth; fzIdx++) { for (int fyIdx = 0; fyIdx < FilterSize; fyIdx++) { for (int fxIdx = 0; fxIdx < FilterSize; fxIdx++) { float dz_dw = backpropagationData.FeedForwardData[PreviousLayer].OutputData[PreviousLayer.ConvertToNeuronIndex(xIdx * Stride + fxIdx, yIdx * Stride + fyIdx, fzIdx)]; dE_dw[ToWeightIndex(fxIdx, fyIdx, fzIdx, 0)] += dE_dz_tmp * dz_dw; } } } dE_db += dE_do * do_dz; // dz_dw = 1 for bias } } for (int fzIdx = 0; fzIdx < PreviousLayer.Depth; fzIdx++) { for (int fyIdx = 0; fyIdx < FilterSize; fyIdx++) { for (int fxIdx = 0; fxIdx < FilterSize; fxIdx++) { int weightIndex = ToWeightIndex(fxIdx, fyIdx, fzIdx, fIdx); newWeights[weightIndex] = backpropagationData.CalculateNewWeight(this.weights[weightIndex], dE_dw[ToWeightIndex(fxIdx, fyIdx, fzIdx, 0)], this, weightIndex); } } } this.biases[fIdx] += backpropagationData.CalculateNewWeight(this.biases[fIdx], dE_db, this, fIdx); } backpropagationData.dE_dz[this] = dE_dz; backpropagationData.UpdatedWeights[this] = newWeights; }
internal override void PropagateBackward(NNDetailedBackpropagationData backpropagationData) { float[] lastRawOutput = backpropagationData.FeedForwardData[this].RawOutputData; float[] dE_dz = new float[Neurons]; float[] newWeights = new float[Neurons * PreviousLayer.Neurons]; for (int oIdx = 0; oIdx < Neurons; oIdx++) { // The gradient of the error with respect to a specific weight is the // gradient of the error with respect to the output multiplied by the // gradient of the output with respect to the raw output multiplied by the // gradient of the raw output with respect to the specific weight // dE/dw_ij = dE/do_j * do_j/dz_j * dz_j/dw_ij // Calculate neurons specific values // Gradient of the output with respect to the neuron raw output is the gradient of the activation function with the raw output float do_dz = Activation.Gradient(lastRawOutput[oIdx], lastRawOutput); float dE_do; if (NextLayer == null) { // Output of this neuron float o = backpropagationData.FeedForwardData[this].OutputData[oIdx]; // Gradient of the error with respect to the neurons's output on the last layer is the gradient of the error function with the neuron's output dE_do = backpropagationData.ErrorGradient(o, oIdx); } else { // Gradient of error with respect to the neuron's output on any other layer is the sum // of each weight connecting this neuron to every neuron in the next layer // multiplied by that neuron's gradient of the error with respect to that neuron's raw output dE_do = NextLayer.CalculateErrorByOutputGradient(backpropagationData, oIdx); } // Gradient of the error with respect to the raw output is the gradient of the error with respect to the output // multiplied by the gradient of the output with respect to the raw output dE_dz[oIdx] = dE_do * do_dz; // Calculate weight specific values for (int iIdx = 0; iIdx < PreviousLayer.Neurons; iIdx++) { // Gradient of the raw output with respect to the specific weight is the previous layer's output of the connected neuron float dz_dw = backpropagationData.FeedForwardData[PreviousLayer].OutputData[iIdx]; float dE_dw = dE_do * do_dz * dz_dw; // The change for the weight is the error gradient with respect to the weight multiplied by the learning rate int weightIndex = ToWeightIndex(iIdx, oIdx); newWeights[weightIndex] = backpropagationData.CalculateNewWeight(this.weights[weightIndex], dE_dw, this, weightIndex); } // Update Bias float dE_db = dE_do * do_dz; // dz_dw = 1 for bias this.biases[oIdx] += backpropagationData.CalculateNewBias(this.biases[oIdx], dE_db, this, oIdx); // allowed to change bias before full backpropagation because it has no effect in other layers directly } backpropagationData.dE_dz[this] = dE_dz; backpropagationData.UpdatedWeights[this] = newWeights; }