//Stochastic descent (all code below is done according to formulas) //This adds each NN's gradients to the avg public void Descend() { //Input for (int i = 0; i < InputCount; i++) { for (int ii = 0; ii < Resolution * Resolution; ii++) { //Nesterov momentum InputWeightMomentum[i, ii] = (InputWeightMomentum[i, ii] * Momentum) - (LearningRate * InputWeightGradient[i, ii]); AvgInputWeightGradient[i, ii] += InputWeightGradient[i, ii] + InputWeightMomentum[i, ii]; } double tempbias = InputErrorSignals[i] * ActivationFunctions.TanhDerriv(InputZVals[i]); InputBiasMomentum[i] = (InputBiasMomentum[i] * Momentum) - (LearningRate * tempbias); AvgInputBiasGradient[i] += tempbias + InputBiasMomentum[i]; } //Hidden for (int i = 0; i < HiddenDepth; i++) { for (int ii = 0; ii < HiddenCount; ii++) { if (i == 0) { for (int iii = 0; iii < InputCount; iii++) { //Nesterov momentum FirstHiddenWeightMomentum[ii, iii] = (FirstHiddenWeightMomentum[ii, iii] * Momentum) - (LearningRate * FirstHiddenWeightGradient[ii, iii]); AvgFirstHiddenWeightGradient[ii, iii] += FirstHiddenWeightGradient[ii, iii] + FirstHiddenWeightMomentum[ii, iii]; } } else { for (int iii = 0; iii < HiddenCount; iii++) { //Nesterov momentum HiddenWeightMomentum[i - 1, ii, iii] = (HiddenWeightMomentum[i - 1, ii, iii] * Momentum) - (LearningRate * HiddenWeightGradient[i - 1, ii, iii]); AvgHiddenWeightGradient[i - 1, ii, iii] += HiddenWeightGradient[i - 1, ii, iii] + HiddenWeightMomentum[i - 1, ii, iii]; } } double tempbias = HiddenErrorSignals[i, ii] * ActivationFunctions.TanhDerriv(HiddenZVals[i, ii]); HiddenBiasMomentum[i, ii] = (HiddenBiasMomentum[i, ii] * Momentum) - (LearningRate * tempbias); AvgHiddenBiasGradient[i, ii] += tempbias + HiddenBiasMomentum[i, ii]; } } //Output for (int i = 0; i < OutputCount; i++) { for (int ii = 0; ii < HiddenCount; ii++) { //Nesterov momentum OutputWeightMomentum[i, ii] = (OutputWeightMomentum[i, ii] * Momentum) - (LearningRate * OutputWeightGradient[i, ii]); AvgOutputWeightGradient[i, ii] += OutputWeightGradient[i, ii] + OutputWeightMomentum[i, ii]; } } }
/// <summary> /// Backpropagation of error (formulas) /// </summary> /// <param name="image">The matrix (image) to be forward propagated from</param> /// <param name="correct">The number shown in the image</param> public void Backprop(double[,] image, int correct) { //Forward propagation of data Calculate(image); //Reset things about to be calculated InputErrorSignals = new double[InputCount]; HiddenErrorSignals = new double[HiddenDepth, HiddenCount]; OutputErrorSignals = new double[OutputCount]; InputWeightGradient = new double[InputCount, Resolution *Resolution]; FirstHiddenWeightGradient = new double[HiddenCount, InputCount]; HiddenWeightGradient = new double[HiddenDepth - 1, HiddenCount, HiddenCount]; OutputWeightGradient = new double[OutputCount, HiddenCount]; //Output //Foreach ending neuron for (int k = 0; k < OutputCount; k++) { double upperlayerderiv = 2d * ((k == correct ? 1d : 0d) - OutputValues[k]); OutputErrorSignals[k] = upperlayerderiv; //Calculate gradient //This works b/c of only 1 hidden layer, will need to be changed if HiddenDepth is modified for (int j = 0; j < HiddenCount; j++) { OutputWeightGradient[k, j] = HiddenValues[HiddenDepth - 1, j] * ActivationFunctions.TanhDerriv(OutputZVals[k]) * OutputErrorSignals[k]; } } //Hidden //Foreach layer of hidden 'neurons' //Calc errors for (int l = HiddenDepth - 1; l >= 0; l--) { //Hidden upper layer derrivative calculation //Foreach starting neuron if (l == HiddenDepth - 1) { for (int k = 0; k < HiddenCount; k++) { double upperlayerderiv = 0; //Foreach ending neuron for (int j = 0; j < OutputCount; j++) { //Hiddenweights uses l because the formula's l + 1 is l due to a lack of input layer in this array upperlayerderiv += OutputWeights[j, k] * ActivationFunctions.TanhDerriv(OutputZVals[j]) * OutputErrorSignals[j]; } HiddenErrorSignals[l, k] = upperlayerderiv; } } else { for (int k = 0; k < HiddenCount; k++) { double upperlayerderiv = 0; //Foreach ending neuron for (int j = 0; j < HiddenCount; j++) { //Hiddenweights uses l instead of l + 1 because firsthiddenweights is a different array upperlayerderiv += HiddenWeights[l, j, k] * ActivationFunctions.TanhDerriv(HiddenZVals[l + 1, j]) * HiddenErrorSignals[l + 1, j]; } HiddenErrorSignals[l, k] = upperlayerderiv; } } } //Calc values for (int l = 0; l < HiddenDepth; l++) { //Foreach starting neuron for (int k = 0; k < HiddenCount; k++) { if (l == 0) { //Foreach ending neuron neuron for (int j = 0; j < InputCount; j++) { FirstHiddenWeightGradient[k, j] = InputValues[j] * ActivationFunctions.TanhDerriv(HiddenZVals[l, k]) * HiddenErrorSignals[l, k]; } } else { //Foreach ending neuron neuron for (int j = 0; j < HiddenCount; j++) { HiddenWeightGradient[l - 1, k, j] = HiddenValues[l - 1, j] * ActivationFunctions.TanhDerriv(HiddenZVals[l, k]) * HiddenErrorSignals[l, k]; } } } } //Input //Foreach starting neuron for (int k = 0; k < InputCount; k++) { double upperlayerderiv = 0; //Calculate error signal //Foreach ending neuron for (int j = 0; j < HiddenCount; j++) { upperlayerderiv += FirstHiddenWeights[j, k] * ActivationFunctions.TanhDerriv(HiddenZVals[0, j]) * HiddenErrorSignals[0, j]; } InputErrorSignals[k] = upperlayerderiv; //Calculate gradient for (int j = 0; j < Resolution * Resolution; j++) { InputWeightGradient[k, j] = image[j / Resolution, j - ((j / Resolution) * Resolution)] * ActivationFunctions.TanhDerriv(InputZVals[k]) * InputErrorSignals[k]; } } //Normalize gradients (currently disabled as is obvious) /* * InputWeightGradient = ActivationFunctions.Normalize(InputWeightGradient, InputCount, Resolution * Resolution); * HiddenWeightGradient = ActivationFunctions.Normalize(HiddenWeightGradient, HiddenDepth, HiddenCount, InputCount); * OutputWeightGradient = ActivationFunctions.Normalize(OutputWeightGradient, OutputCount, HiddenCount); * //Normalize error signals (biases) * HiddenErrorSignals = ActivationFunctions.Normalize(HiddenErrorSignals, HiddenDepth, HiddenCount); * InputErrorSignals = ActivationFunctions.Normalize(InputErrorSignals); */ }