private void ComputeDeltaWeights(FeedForwardData feedForward) { int numberOfWeightMatrices = _weights.Count; _gradientMatrices.Reverse(); // gradients right-to-left, delta weights left-to-right for (int i = 0; i < numberOfWeightMatrices; i++) { int numberOfRowsInCurrentWeightMatrix = _weights[i].GetLength(0); int numberOfColumnsInCurrentWeightMatrix = _weights[i].GetLength(1); var deltaWeightMatrix = new double[numberOfRowsInCurrentWeightMatrix, numberOfColumnsInCurrentWeightMatrix]; var forwardGradientMatrix = _gradientMatrices[i]; var penultimateLayerOutputValues = feedForward.LayerOutputs[i]; int indexOfNeuronInPenultimateLayer = 0; for (int j = 0; j < numberOfColumnsInCurrentWeightMatrix; j++) { for (int k = 0; k < numberOfRowsInCurrentWeightMatrix; k++) { deltaWeightMatrix[k, j] = forwardGradientMatrix[k] * penultimateLayerOutputValues[indexOfNeuronInPenultimateLayer]; } indexOfNeuronInPenultimateLayer++; } _deltaWeightMatrices.Add(deltaWeightMatrix); } }
public double Backpropagate(FeedForwardData feedForwardData, double[] targetOutputs, TrainingParameters trainingParams, int timeStep) //public double Backpropagate(FeedForwardData feedForwardData, double[] targetOutputs, TrainingParameters trainingParams) { //1. compute error int outputLayerIndex = LayerStructure.HiddenLayerList.Count + 1; var predictedOutputs = feedForwardData.LayerOutputs[outputLayerIndex]; GenerateErrorMatrix(predictedOutputs, targetOutputs, trainingParams); ComputeAccumulatedError(predictedOutputs, targetOutputs); //2. compute gradients ComputeGradients(feedForwardData); //3. compute delta weights and biases ComputeDeltaWeights(feedForwardData); ComputeDeltaBiases(); //4. update weights and biases UpdateWeights(trainingParams); UpdateBiases(trainingParams); //adam update-----------------8::::::::::::>--------------------------- //UpdateWeightsAdam(trainingParams, timeStep); //UpdateBiasesAdam(trainingParams, timeStep); //reset lists ClearLists(); return(AccumulatedError); }
private void ComputeGradients(FeedForwardData feedForwardData) { //output gradients int numberOfOutputNeurons = LayerStructure.numberOfOutputNodes; _outputGradientMatrix = new double[numberOfOutputNeurons]; int outputLayerIndex = feedForwardData.LayerInputs.Count - 1; var outputLayerInputs = feedForwardData.LayerOutputs[outputLayerIndex]; for (int i = 0; i < numberOfOutputNeurons; i++) { _outputGradientMatrix[i] = DerivedErrorMatrix[i] * _outputActivationFunction.DerivedActivationFunctionOutput(outputLayerInputs[i]); } _gradientMatrices.Add(_outputGradientMatrix); //hidden layer gradients int numberOfHiddenLayers = LayerStructure.HiddenLayerList.Count; var forwardLayerGradients = _outputGradientMatrix; for (int i = numberOfHiddenLayers; i > 0; i--) { var currentWeightMatrix = Matrix.Transpose(_weights[i]); //this was transposed in the original... why?! var currentHiddenLayerInputs = feedForwardData.LayerInputs[i]; //this is for regression //var currentHiddenLayerInputs = feedForwardData.LayerOutputs[i]; //this is for classification int numberOfNeuronsInCurrentLayer = feedForwardData.LayerOutputs[i].GetLength(0); int numberOfNeuronsInForwardLayer = feedForwardData.LayerOutputs[i + 1].GetLength(0); var hiddenGradientMatrix = new double[numberOfNeuronsInCurrentLayer]; for (int j = 0; j < numberOfNeuronsInCurrentLayer; j++) { double weightGradientSum = 0.0; for (int k = 0; k < numberOfNeuronsInForwardLayer; k++) { weightGradientSum += currentWeightMatrix[j, k] * forwardLayerGradients[k]; } hiddenGradientMatrix[j] = weightGradientSum * _hiddenActivationFunction.DerivedActivationFunctionOutput(currentHiddenLayerInputs[j]); } _gradientMatrices.Add(hiddenGradientMatrix); forwardLayerGradients = hiddenGradientMatrix; } }