Example #1
0
    /// <summary>
    /// Back propagates and send the derivatives.
    /// </summary>
    /// <param name="errorList">The error list of the current layer.</param>
    /// <param name="previousErrorList">The error list of the previous layer.</param>
    public void BackPropagateSecondDerivatives(ErrorsList errorList, ErrorsList previousErrorList)
    {
        // nomenclature (repeated from NeuronalNetwork class)
        // NOTE: even though we are addressing SECOND derivatives ( and not first derivatives),
        // we use nearly the same notation as if there were first derivatives, since otherwise the
        // ASCII look would be confusing.  We add one "2" but not two "2's", such as "d2Err_wrt_dXn",
        // to give a gentle emphasis that we are using second derivatives
        //
        // Err is output error of the entire neuronal network
        // Xn is the output vector on the n-th layer
        // Xnm1 is the output vector of the previous layer
        // Wn is the vector of weights of the n-th layer
        // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
        // F is the squashing function: Xn = F(Yn)
        // F' is the derivative of the squashing function
        //   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
        int    ii, jj;
        uint   kk;
        double output;
        double tempValue;

        var neuronsErrorList = new ErrorsList(this.Neurons.Count);
        var weightsErrorList = new double[this.Weights.Count];

        for (ii = 0; ii < this.Weights.Count; ii++)
        {
            weightsErrorList[ii] = 0.0;
        }

        // Calculate d2Err_wrt_dYn = ( F'(Yn) )^2 * dErr_wrt_Xn (where dErr_wrt_Xn is actually a second derivative )
        for (ii = 0; ii < this.Neurons.Count; ii++)
        {
            output    = this.Neurons[ii].Output;
            tempValue = SigmoidFunction.DeSigmoid(output);
            neuronsErrorList.Add(errorList[ii] * tempValue * tempValue);
        }

        // Calculate d2Err_wrt_Wn = ( Xnm1 )^2 * d2Err_wrt_Yn (where dE2rr_wrt_Yn is actually a second derivative)
        // For each neuron in this layer, go through the list of connections from the prior layer, and
        // update the differential for the corresponding weight
        ii = 0;

        foreach (var neuron in this.Neurons)
        {
            foreach (var connection in neuron.Connections)
            {
                try
                {
                    if (this.previousLayer is null)
                    {
                        continue;
                    }

                    kk     = connection.NeuronIndex;
                    output = kk == 0xffffffff ? 1.0 : this.previousLayer.Neurons[(int)kk].Output;
                    weightsErrorList[connection.WeightIndex] = neuronsErrorList[ii] * output * output;
                }
                catch (Exception)
                {
                    // ignored
                }
            }

            ii++;
        }

        // Calculate d2Err_wrt_Xnm1 = ( Wn )^2 * d2Err_wrt_dYn (where d2Err_wrt_dYn is a second derivative not a first).
        // d2Err_wrt_Xnm1 is needed as the input value of
        // d2Err_wrt_Xn for back propagation of second derivatives for the next (i.e., previous spatially) layer
        // For each neuron in this layer
        ii = 0;

        foreach (var neuron in this.Neurons)
        {
            foreach (var connection in neuron.Connections)
            {
                try
                {
                    kk = connection.NeuronIndex;

                    // We exclude ULONG_MAX, which signifies the phantom bias neuron with
                    // constant output of "1", since we cannot train the bias neuron
                    if (kk == 0xffffffff)
                    {
                        continue;
                    }

                    var index = (int)kk;
                    tempValue = this.Weights[(int)connection.WeightIndex].Value;
                    previousErrorList[index] += neuronsErrorList[ii] * tempValue * tempValue;
                }
                catch (Exception)
                {
                    return;
                }
            }

            // ii tracks the neuron iterator
            ii++;
        }

        // Finally, update the diagonal Hessians for the weights of this layer neuron using dErr_wrt_dW.
        // By design, this function (and its iteration over many (approx 500 patterns) is called while a
        // single thread has locked the neuronal network, so there is no possibility that another
        // thread might change the value of the Hessian.  Nevertheless, since it's easy to do, we
        // use an atomic compare-and-exchange operation, which means that another thread might be in
        // the process of back propagation of second derivatives and the Hessians might have shifted slightly
        for (jj = 0; jj < this.Weights.Count; jj++)
        {
            var oldValue = this.Weights[jj].DiagonalHessian;
            var newValue = oldValue + weightsErrorList[jj];
            this.Weights[jj].DiagonalHessian = newValue;
        }
    }
Example #2
0
    /// <summary>
    /// Back propagates the neuronal network layer.
    /// </summary>
    /// <param name="errorList">The error list.</param>
    /// <param name="previousErrorList">The previous error list.</param>
    /// <param name="thisLayerOutput">The values of this layer.</param>
    /// <param name="previousLayerOutput">The values of the previous layer.</param>
    /// <param name="etaLearningRate">The ETA learning rate.</param>
    public void BackPropagate(ErrorsList errorList, ErrorsList previousErrorList, NeuronalNetworkNeuronOutputs?thisLayerOutput, NeuronalNetworkNeuronOutputs?previousLayerOutput, double etaLearningRate)
    {
        // nomenclature (repeated from NeuronalNetwork class):
        //
        // Err is output error of the entire neuronal network
        // Xn is the output vector on the n-th layer
        // Xnm1 is the output vector of the previous layer
        // Wn is the vector of weights of the n-th layer
        // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
        // F is the squashing function: Xn = F(Yn)
        // F' is the derivative of the squashing function
        //   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
        try
        {
            int    ii, jj;
            uint   kk;
            double output;
            var    neuronsErrorList = new ErrorsList(this.Neurons.Count);
            var    weightsErrorList = new double[this.Weights.Count];

            for (ii = 0; ii < this.Weights.Count; ii++)
            {
                weightsErrorList[ii] = 0.0;
            }

            var memorized = thisLayerOutput != null && previousLayerOutput != null;

            // Calculate dErr_wrt_dYn = F'(Yn) * dErr_wrt_Xn
            for (ii = 0; ii < this.Neurons.Count; ii++)
            {
                if (thisLayerOutput is null)
                {
                    continue;
                }

                output = memorized ? thisLayerOutput[ii] : this.Neurons[ii].Output;
                neuronsErrorList.Add(SigmoidFunction.DeSigmoid(output) * errorList[ii]);
            }

            // Calculate dErr_wrt_Wn = Xnm1 * dErr_wrt_Yn
            // For each neuron in this layer, go through the list of connections from the prior layer, and
            // update the differential for the corresponding weight
            ii = 0;

            foreach (var neuron in this.Neurons)
            {
                foreach (var connection in neuron.Connections)
                {
                    kk = connection.NeuronIndex;
                    if (kk == 0xffffffff)
                    {
                        // This is the bias weight
                        output = 1.0;
                    }
                    else
                    {
                        if (this.previousLayer is null || previousLayerOutput is null)
                        {
                            continue;
                        }

                        output = memorized
                                     ? previousLayerOutput[(int)kk]
                                     : this.previousLayer.Neurons[(int)kk].Output;
                    }

                    weightsErrorList[connection.WeightIndex] += neuronsErrorList[ii] * output;
                }

                ii++;
            }

            // Calculate dErr_wrt_Xnm1 = Wn * dErr_wrt_dYn, which is needed as the input value of
            // dErr_wrt_Xn for back propagation of the next (i.e., previous) layer
            // For each neuron in this layer
            ii = 0;

            foreach (var neuron in this.Neurons)
            {
                foreach (var connection in neuron.Connections)
                {
                    kk = connection.NeuronIndex;

                    // We exclude ULONG_MAX, which signifies the phantom bias neuron with
                    // constant output of "1", since we cannot train the bias neuron
                    if (kk == 0xffffffff)
                    {
                        continue;
                    }

                    var index = (int)kk;
                    previousErrorList[index] += neuronsErrorList[ii] * this.Weights[(int)connection.WeightIndex].Value;
                }

                // ii tracks the neuron iterator
                ii++;
            }

            // Finally, update the weights of this layer neuron using dErr_wrt_dW and the learning rate eta
            // Use an atomic compare-and-exchange operation, which means that another thread might be in
            // the process of back propagation and the weights might have shifted slightly
            const double Micron = 0.10;

            for (jj = 0; jj < this.Weights.Count; ++jj)
            {
                var divisor = this.Weights[jj].DiagonalHessian + Micron;

                // The following code has been rendered unnecessary, since the value of the Hessian has been
                // verified when it was created, so as to ensure that it is strictly
                // zero-positive. Thus, it is impossible for the diagHessian to be less than zero,
                // and it is impossible for the divisor to be less than micron
                var epsilon            = etaLearningRate / divisor;
                var oldValue           = this.Weights[jj].Value;
                var newValue           = oldValue - (epsilon * weightsErrorList[jj]);
                var currentWeightValue = this.Weights[jj].Value;

                while (Math.Abs(oldValue - Interlocked.CompareExchange(ref currentWeightValue, newValue, oldValue)) > 0.00000000000000000001)
                {
                    // Another thread must have modified the weight.
                    // Obtain its new value, adjust it, and try again
                    oldValue = this.Weights[jj].Value;
                    newValue = oldValue - (epsilon * weightsErrorList[jj]);
                }
            }
        }
        catch (Exception)
        {
            // ignored
        }
    }