private double[] Backpropagate(int[] expected) { // calculate the delta of the output layer layers.Last().Delta = costFunction.Delta(expected, layers.Last().Outputs, layers.Last().Activation); // calculate the sum of the last layer errors for the bias gradient layers.Last().ErrorsSum = layers.Last().ErrorsSum.Add(layers.Last().Delta); var derivativeByWeight = layers.Last().Delta.Outer(layers.SecondToLast().Outputs); // calculate weight gradients of the output layer layers.Last().WeightGradients = layers.Last().WeightGradients.Add(derivativeByWeight); // calculate errors and weight gradients of other layers for (var i = layers.Count - 2; i > 0; i--) { var partialDerivative = layers[i].Activation.PartialDerivative(layers[i].Outputs); // calculate the layer error layers[i].Delta = Elementwise.Multiply( layers[i + 1].Weights.Transpose().Dot(layers[i + 1].Delta), partialDerivative); // calculate the sum of the layer errors for the bias gradient layers[i].ErrorsSum = layers[i].ErrorsSum.Add(layers[i].Delta); // calculate weight gradients of the current layer derivativeByWeight = layers[i].Delta.Outer(layers[i - 1].Outputs); layers[i].WeightGradients = layers[i].WeightGradients.Add(derivativeByWeight); } return((double[])layers.Last().Outputs.Clone()); }
private void Backprop( Tuple <Matrix <float>, Matrix <float> > inputOutput, out Matrix <float>[] nabla_b, out Matrix <float>[] nabla_w) { nabla_b = CloneWithZeroes(mBiases); nabla_w = CloneWithZeroes(mWeights); // feedforward var activation = inputOutput.Item1; var activations = new List <Matrix <float> >() { activation }; // layer-by-layer activations var zs = new List <Matrix <float> >(); // layer-by-layer z vectors for (int i = 0; i < mNumberOfLayers - 1; i++) { var z = mWeights[i] * activation + mBiases[i]; zs.Add(z); activation = z.Sigmoid(); activations.Add(activation); } // backward pass var delta = mCostFunction.Delta(zs[zs.Count - 1], activations[activations.Count - 1], inputOutput.Item2); nabla_b[nabla_b.Length - 1] = delta; nabla_w[nabla_w.Length - 1] = delta * activations[activations.Count - 2].Transpose(); for (int i = 2; i < mNumberOfLayers; i++) { var z = zs[zs.Count - i]; var sp = z.SigmoidPrime(); delta = (mWeights[mWeights.Length - i + 1].Transpose() * delta).HadamardProduct(sp); nabla_b[nabla_b.Length - i] = delta; nabla_w[nabla_w.Length - i] = delta * activations[activations.Count - i - 1].Transpose(); } }