private void CalculateGradientForSingleTrainingExample(Network network, IErrorFunction errorFunction, ref List <List <NeuronData> > intermediateResults, float[] trainingInput, float[] trainingDesiredOutput) { List <float[]> activations = new List <float[]>(); List <float[]> zValues = new List <float[]>(); network.Compute(this, trainingInput, ref activations, ref zValues, false); //dont flush working cache var lastLayerGradient = intermediateResults.Last(); List <float> delta_k_holder = new List <float>(); CalculateOutputLayerGradient(network, errorFunction, ref lastLayerGradient, ref delta_k_holder, activations, trainingInput, zValues, trainingDesiredOutput); for (int i = network.layers.Count - 2; i >= 0; --i) { var layerGradient = intermediateResults[i]; CalculateHiddenLayerGradient(network, i, ref layerGradient, ref delta_k_holder, i == 0 ? trainingInput : activations[i - 1], zValues); } }
private void CalculateOutputLayerGradient(Network network, IErrorFunction errorFunction, ref List <NeuronData> gradientData, ref List <float> delta_k_vector, List <float[]> activations, float[] trainingInput, List <float[]> zValues, float[] desiredOutput) { var prevActivations = activations.Count <= 1 ? trainingInput : activations[activations.Count - 2]; int lastLayerWeightCount = network.layers.Last().GetWeightsPerNeuron(); int lastLayerNeuronCount = network.layers.Last().GetNeuronCount(); for (int i = 0; i < lastLayerNeuronCount; i++) { float outputValue = activations.Last()[i]; float delta_k = errorFunction.CalculateDelta(zValues.Last()[i], outputValue, desiredOutput[i], network.activationFunction); var gradientDataItem = gradientData[i]; //Assert(gradientData[i].weights.Length == prevActivations.Length); for (int j = 0; j < lastLayerWeightCount; j++) { gradientDataItem.weights[j] += delta_k * prevActivations[j]; } gradientDataItem.bias += delta_k; delta_k_vector.Add(delta_k); } }