/// <summary> /// Back propagates and send the derivatives. /// </summary> /// <param name="errorList">The error list of the current layer.</param> /// <param name="previousErrorList">The error list of the previous layer.</param> public void BackPropagateSecondDerivatives(ErrorsList errorList, ErrorsList previousErrorList) { // nomenclature (repeated from NeuronalNetwork class) // NOTE: even though we are addressing SECOND derivatives ( and not first derivatives), // we use nearly the same notation as if there were first derivatives, since otherwise the // ASCII look would be confusing. We add one "2" but not two "2's", such as "d2Err_wrt_dXn", // to give a gentle emphasis that we are using second derivatives // // Err is output error of the entire neuronal network // Xn is the output vector on the n-th layer // Xnm1 is the output vector of the previous layer // Wn is the vector of weights of the n-th layer // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied // F is the squashing function: Xn = F(Yn) // F' is the derivative of the squashing function // Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input int ii, jj; uint kk; double output; double tempValue; var neuronsErrorList = new ErrorsList(this.Neurons.Count); var weightsErrorList = new double[this.Weights.Count]; for (ii = 0; ii < this.Weights.Count; ii++) { weightsErrorList[ii] = 0.0; } // Calculate d2Err_wrt_dYn = ( F'(Yn) )^2 * dErr_wrt_Xn (where dErr_wrt_Xn is actually a second derivative ) for (ii = 0; ii < this.Neurons.Count; ii++) { output = this.Neurons[ii].Output; tempValue = SigmoidFunction.DeSigmoid(output); neuronsErrorList.Add(errorList[ii] * tempValue * tempValue); } // Calculate d2Err_wrt_Wn = ( Xnm1 )^2 * d2Err_wrt_Yn (where dE2rr_wrt_Yn is actually a second derivative) // For each neuron in this layer, go through the list of connections from the prior layer, and // update the differential for the corresponding weight ii = 0; foreach (var neuron in this.Neurons) { foreach (var connection in neuron.Connections) { try { if (this.previousLayer is null) { continue; } kk = connection.NeuronIndex; output = kk == 0xffffffff ? 1.0 : this.previousLayer.Neurons[(int)kk].Output; weightsErrorList[connection.WeightIndex] = neuronsErrorList[ii] * output * output; } catch (Exception) { // ignored } } ii++; } // Calculate d2Err_wrt_Xnm1 = ( Wn )^2 * d2Err_wrt_dYn (where d2Err_wrt_dYn is a second derivative not a first). // d2Err_wrt_Xnm1 is needed as the input value of // d2Err_wrt_Xn for back propagation of second derivatives for the next (i.e., previous spatially) layer // For each neuron in this layer ii = 0; foreach (var neuron in this.Neurons) { foreach (var connection in neuron.Connections) { try { kk = connection.NeuronIndex; // We exclude ULONG_MAX, which signifies the phantom bias neuron with // constant output of "1", since we cannot train the bias neuron if (kk == 0xffffffff) { continue; } var index = (int)kk; tempValue = this.Weights[(int)connection.WeightIndex].Value; previousErrorList[index] += neuronsErrorList[ii] * tempValue * tempValue; } catch (Exception) { return; } } // ii tracks the neuron iterator ii++; } // Finally, update the diagonal Hessians for the weights of this layer neuron using dErr_wrt_dW. // By design, this function (and its iteration over many (approx 500 patterns) is called while a // single thread has locked the neuronal network, so there is no possibility that another // thread might change the value of the Hessian. Nevertheless, since it's easy to do, we // use an atomic compare-and-exchange operation, which means that another thread might be in // the process of back propagation of second derivatives and the Hessians might have shifted slightly for (jj = 0; jj < this.Weights.Count; jj++) { var oldValue = this.Weights[jj].DiagonalHessian; var newValue = oldValue + weightsErrorList[jj]; this.Weights[jj].DiagonalHessian = newValue; } }
/// <summary> /// Back propagates the neuronal network layer. /// </summary> /// <param name="errorList">The error list.</param> /// <param name="previousErrorList">The previous error list.</param> /// <param name="thisLayerOutput">The values of this layer.</param> /// <param name="previousLayerOutput">The values of the previous layer.</param> /// <param name="etaLearningRate">The ETA learning rate.</param> public void BackPropagate(ErrorsList errorList, ErrorsList previousErrorList, NeuronalNetworkNeuronOutputs?thisLayerOutput, NeuronalNetworkNeuronOutputs?previousLayerOutput, double etaLearningRate) { // nomenclature (repeated from NeuronalNetwork class): // // Err is output error of the entire neuronal network // Xn is the output vector on the n-th layer // Xnm1 is the output vector of the previous layer // Wn is the vector of weights of the n-th layer // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied // F is the squashing function: Xn = F(Yn) // F' is the derivative of the squashing function // Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input try { int ii, jj; uint kk; double output; var neuronsErrorList = new ErrorsList(this.Neurons.Count); var weightsErrorList = new double[this.Weights.Count]; for (ii = 0; ii < this.Weights.Count; ii++) { weightsErrorList[ii] = 0.0; } var memorized = thisLayerOutput != null && previousLayerOutput != null; // Calculate dErr_wrt_dYn = F'(Yn) * dErr_wrt_Xn for (ii = 0; ii < this.Neurons.Count; ii++) { if (thisLayerOutput is null) { continue; } output = memorized ? thisLayerOutput[ii] : this.Neurons[ii].Output; neuronsErrorList.Add(SigmoidFunction.DeSigmoid(output) * errorList[ii]); } // Calculate dErr_wrt_Wn = Xnm1 * dErr_wrt_Yn // For each neuron in this layer, go through the list of connections from the prior layer, and // update the differential for the corresponding weight ii = 0; foreach (var neuron in this.Neurons) { foreach (var connection in neuron.Connections) { kk = connection.NeuronIndex; if (kk == 0xffffffff) { // This is the bias weight output = 1.0; } else { if (this.previousLayer is null || previousLayerOutput is null) { continue; } output = memorized ? previousLayerOutput[(int)kk] : this.previousLayer.Neurons[(int)kk].Output; } weightsErrorList[connection.WeightIndex] += neuronsErrorList[ii] * output; } ii++; } // Calculate dErr_wrt_Xnm1 = Wn * dErr_wrt_dYn, which is needed as the input value of // dErr_wrt_Xn for back propagation of the next (i.e., previous) layer // For each neuron in this layer ii = 0; foreach (var neuron in this.Neurons) { foreach (var connection in neuron.Connections) { kk = connection.NeuronIndex; // We exclude ULONG_MAX, which signifies the phantom bias neuron with // constant output of "1", since we cannot train the bias neuron if (kk == 0xffffffff) { continue; } var index = (int)kk; previousErrorList[index] += neuronsErrorList[ii] * this.Weights[(int)connection.WeightIndex].Value; } // ii tracks the neuron iterator ii++; } // Finally, update the weights of this layer neuron using dErr_wrt_dW and the learning rate eta // Use an atomic compare-and-exchange operation, which means that another thread might be in // the process of back propagation and the weights might have shifted slightly const double Micron = 0.10; for (jj = 0; jj < this.Weights.Count; ++jj) { var divisor = this.Weights[jj].DiagonalHessian + Micron; // The following code has been rendered unnecessary, since the value of the Hessian has been // verified when it was created, so as to ensure that it is strictly // zero-positive. Thus, it is impossible for the diagHessian to be less than zero, // and it is impossible for the divisor to be less than micron var epsilon = etaLearningRate / divisor; var oldValue = this.Weights[jj].Value; var newValue = oldValue - (epsilon * weightsErrorList[jj]); var currentWeightValue = this.Weights[jj].Value; while (Math.Abs(oldValue - Interlocked.CompareExchange(ref currentWeightValue, newValue, oldValue)) > 0.00000000000000000001) { // Another thread must have modified the weight. // Obtain its new value, adjust it, and try again oldValue = this.Weights[jj].Value; newValue = oldValue - (epsilon * weightsErrorList[jj]); } } } catch (Exception) { // ignored } }