Esempio n. 1
0
        private double[] Backpropagate(int[] expected)
        {
            // calculate the delta of the output layer
            layers.Last().Delta = costFunction.Delta(expected, layers.Last().Outputs, layers.Last().Activation);

            // calculate the sum of the last layer errors for the bias gradient
            layers.Last().ErrorsSum = layers.Last().ErrorsSum.Add(layers.Last().Delta);
            var derivativeByWeight  = layers.Last().Delta.Outer(layers.SecondToLast().Outputs);

            // calculate weight gradients of the output layer
            layers.Last().WeightGradients = layers.Last().WeightGradients.Add(derivativeByWeight);

            // calculate errors and weight gradients of other layers
            for (var i = layers.Count - 2; i > 0; i--)
            {
                var partialDerivative = layers[i].Activation.PartialDerivative(layers[i].Outputs);

                // calculate the layer error
                layers[i].Delta = Elementwise.Multiply(
                    layers[i + 1].Weights.Transpose().Dot(layers[i + 1].Delta), partialDerivative);

                // calculate the sum of the layer errors for the bias gradient
                layers[i].ErrorsSum = layers[i].ErrorsSum.Add(layers[i].Delta);

                // calculate weight gradients of the current layer
                derivativeByWeight        = layers[i].Delta.Outer(layers[i - 1].Outputs);
                layers[i].WeightGradients = layers[i].WeightGradients.Add(derivativeByWeight);
            }

            return((double[])layers.Last().Outputs.Clone());
        }
        private void Backprop(
            Tuple <Matrix <float>, Matrix <float> > inputOutput,
            out Matrix <float>[] nabla_b,
            out Matrix <float>[] nabla_w)
        {
            nabla_b = CloneWithZeroes(mBiases);
            nabla_w = CloneWithZeroes(mWeights);

            // feedforward
            var activation  = inputOutput.Item1;
            var activations = new List <Matrix <float> >()
            {
                activation
            };                                                              // layer-by-layer activations
            var zs = new List <Matrix <float> >();                          // layer-by-layer z vectors

            for (int i = 0; i < mNumberOfLayers - 1; i++)
            {
                var z = mWeights[i] * activation + mBiases[i];
                zs.Add(z);

                activation = z.Sigmoid();
                activations.Add(activation);
            }

            // backward pass
            var delta = mCostFunction.Delta(zs[zs.Count - 1], activations[activations.Count - 1], inputOutput.Item2);

            nabla_b[nabla_b.Length - 1] = delta;
            nabla_w[nabla_w.Length - 1] = delta * activations[activations.Count - 2].Transpose();

            for (int i = 2; i < mNumberOfLayers; i++)
            {
                var z  = zs[zs.Count - i];
                var sp = z.SigmoidPrime();
                delta = (mWeights[mWeights.Length - i + 1].Transpose() * delta).HadamardProduct(sp);
                nabla_b[nabla_b.Length - i] = delta;
                nabla_w[nabla_w.Length - i] = delta * activations[activations.Count - i - 1].Transpose();
            }
        }