Example #1
0
        public void Calculate(double[] inputVector, int iCount,
                              double[] outputVector /* =NULL */, int oCount /* =0 */,
                              NNNeuronOutputsList pNeuronOutputs /* =NULL */)
        {
            var lit = m_Layers.First();

            // first layer is imput layer: directly set outputs of all of its neurons
            // to the input vector
            if (m_Layers.Count > 1)
            {
                int count = 0;
                if (iCount != lit.m_Neurons.Count)
                {
                    return;
                }
                foreach (var nit in lit.m_Neurons)
                {
                    if (count < iCount)
                    {
                        nit.output = inputVector[count];
                        count++;
                    }
                }
            }
            //caculate output of next layers
            for (int i = 1; i < m_Layers.Count; i++)
            {
                m_Layers[i].Calculate();
            }

            // load up output vector with results

            if (outputVector != null)
            {
                lit = m_Layers[m_Layers.Count - 1];

                for (int ii = 0; ii < oCount; ii++)
                {
                    outputVector[ii] = lit.m_Neurons[ii].output;
                }
            }

            // load up neuron output values with results
            if (pNeuronOutputs != null)
            {
                // check for first time use (re-use is expected)
                pNeuronOutputs.Clear();
                // it's empty, so allocate memory for its use
                pNeuronOutputs.Capacity = m_Layers.Count;
                foreach (NNLayer nnlit in m_Layers)
                {
                    var layerOut = new NNNeuronOutputs(nnlit.m_Neurons.Count);
                    for (int ii = 0; ii < nnlit.m_Neurons.Count; ++ii)
                    {
                        layerOut.Add(nnlit.m_Neurons[ii].output);
                    }
                    pNeuronOutputs.Add(layerOut);
                }
            }
        }
Example #2
0
        /////////////
        public void Backpropagate(DErrorsList dErr_wrt_dXn /* in */,
                                  DErrorsList dErr_wrt_dXnm1 /* out */,
                                  NNNeuronOutputs thisLayerOutput, // memorized values of this layer's output
                                  NNNeuronOutputs prevLayerOutput, // memorized values of previous layer's output
                                  double etaLearningRate)
        {
            // nomenclature (repeated from NeuralNetwork class):
            //
            // Err is output error of the entire neural net
            // Xn is the output vector on the n-th layer
            // Xnm1 is the output vector of the previous layer
            // Wn is the vector of weights of the n-th layer
            // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
            // F is the squashing function: Xn = F(Yn)
            // F' is the derivative of the squashing function
            //   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
            try
            {
                int         ii, jj;
                uint        kk;
                int         nIndex;
                double      output;
                DErrorsList dErr_wrt_dYn = new DErrorsList(m_Neurons.Count);
                //
                //	std::vector< double > dErr_wrt_dWn( m_Weights.size(), 0.0 );  // important to initialize to zero
                //////////////////////////////////////////////////
                //
                ///// DESIGN TRADEOFF: REVIEW !!
                // We would prefer (for ease of coding) to use STL vector for the array "dErr_wrt_dWn", which is the
                // differential of the current pattern's error wrt weights in the layer.  However, for layers with
                // many weights, such as fully-connected layers, there are also many weights.  The STL vector
                // class's allocator is remarkably stupid when allocating large memory chunks, and causes a remarkable
                // number of page faults, with a consequent slowing of the application's overall execution time.

                // To fix this, I tried using a plain-old C array, by new'ing the needed space from the heap, and
                // delete[]'ing it at the end of the function.  However, this caused the same number of page-fault
                // errors, and did not improve performance.

                // So I tried a plain-old C array allocated on the stack (i.e., not the heap).  Of course I could not
                // write a statement like
                //    double dErr_wrt_dWn[ m_Weights.size() ];
                // since the compiler insists upon a compile-time known constant value for the size of the array.
                // To avoid this requirement, I used the _alloca function, to allocate memory on the stack.
                // The downside of this is excessive stack usage, and there might be stack overflow probelms.  That's why
                // this comment is labeled "REVIEW"
                double[] dErr_wrt_dWn = new double[m_Weights.Count];
                for (ii = 0; ii < m_Weights.Count; ii++)
                {
                    dErr_wrt_dWn[ii] = 0.0;
                }

                bool bMemorized = (thisLayerOutput != null) && (prevLayerOutput != null);
                // calculate dErr_wrt_dYn = F'(Yn) * dErr_wrt_Xn

                for (ii = 0; ii < m_Neurons.Count; ii++)
                {
                    if (bMemorized != false)
                    {
                        output = thisLayerOutput[ii];
                    }
                    else
                    {
                        output = m_Neurons[ii].output;
                    }

                    dErr_wrt_dYn.Add(m_sigmoid.DSIGMOID(output) * dErr_wrt_dXn[ii]);
                }

                // calculate dErr_wrt_Wn = Xnm1 * dErr_wrt_Yn
                // For each neuron in this layer, go through the list of connections from the prior layer, and
                // update the differential for the corresponding weight

                ii = 0;
                foreach (NNNeuron nit in m_Neurons)
                {
                    foreach (NNConnection cit in nit.m_Connections)
                    {
                        kk = cit.NeuronIndex;
                        if (kk == 0xffffffff)
                        {
                            output = 1.0;  // this is the bias weight
                        }
                        else
                        {
                            if (bMemorized != false)
                            {
                                output = prevLayerOutput[(int)kk];
                            }
                            else
                            {
                                output = m_pPrevLayer.m_Neurons[(int)kk].output;
                            }
                        }
                        dErr_wrt_dWn[cit.WeightIndex] += dErr_wrt_dYn[ii] * output;
                    }

                    ii++;
                }
                // calculate dErr_wrt_Xnm1 = Wn * dErr_wrt_dYn, which is needed as the input value of
                // dErr_wrt_Xn for backpropagation of the next (i.e., previous) layer
                // For each neuron in this layer

                ii = 0;
                foreach (NNNeuron nit in m_Neurons)
                {
                    foreach (NNConnection cit in nit.m_Connections)
                    {
                        kk = cit.NeuronIndex;
                        if (kk != 0xffffffff)
                        {
                            // we exclude ULONG_MAX, which signifies the phantom bias neuron with
                            // constant output of "1", since we cannot train the bias neuron

                            nIndex = (int)kk;
                            dErr_wrt_dXnm1[nIndex] += dErr_wrt_dYn[ii] * m_Weights[(int)cit.WeightIndex].value;
                        }
                    }

                    ii++;  // ii tracks the neuron iterator
                }
                // finally, update the weights of this layer neuron using dErr_wrt_dW and the learning rate eta
                // Use an atomic compare-and-exchange operation, which means that another thread might be in
                // the process of backpropagation and the weights might have shifted slightly
                const double dMicron = 0.10;
                double       epsilon, divisor;
                double       oldValue;
                double       newValue;
                for (jj = 0; jj < m_Weights.Count; ++jj)
                {
                    divisor = m_Weights[jj].diagHessian + dMicron;

                    // the following code has been rendered unnecessary, since the value of the Hessian has been
                    // verified when it was created, so as to ensure that it is strictly
                    // zero-positve.  Thus, it is impossible for the diagHessian to be less than zero,
                    // and it is impossible for the divisor to be less than dMicron

                    /*
                     * if ( divisor < dMicron )
                     * {
                     * // it should not be possible to reach here, since everything in the second derviative equations
                     * // is strictly zero-positive, and thus "divisor" should definitely be as large as MICRON.
                     *
                     * ASSERT( divisor >= dMicron );
                     * divisor = 1.0 ;  // this will limit the size of the update to the same as the size of gloabal eta
                     * }
                     */
                    epsilon  = etaLearningRate / divisor;
                    oldValue = m_Weights[jj].value;
                    newValue = oldValue - epsilon * dErr_wrt_dWn[jj];
                    while (oldValue != Interlocked.CompareExchange(
                               ref (m_Weights[jj].value),
                               (double)newValue, (double)oldValue))
                    {
                        // another thread must have modified the weight.

                        // Obtain its new value, adjust it, and try again

                        oldValue = m_Weights[jj].value;
                        newValue = oldValue - epsilon * dErr_wrt_dWn[jj];
                    }
                }
            }
            catch (Exception ex)
            {
                return;
            }
        }