/// <summary> /// Forward propagation of values /// </summary> /// <param name="image">The matrix (image) to be forward propagated from</param> public void Calculate(double[,] image) { //Reset ZVals (raw values untouched by the activation function), vals, and momentums InputZVals = new double[InputCount]; InputValues = new double[InputCount]; HiddenZVals = new double[HiddenDepth, HiddenCount]; HiddenValues = new double[HiddenDepth, HiddenCount]; OutputZVals = new double[OutputCount]; OutputValues = new double[OutputCount]; //Random r = new Random(); //Random is used for dropout of neurons, but said feature is currently disabled for efficiency reasons //Input for (int k = 0; k < InputCount; k++) { for (int j = 0; j < (Resolution * Resolution); j++) { InputZVals[k] += ((InputWeights[k, j] + InputWeightMomentum[k, j]) * image[j / Resolution, j - ((j / Resolution) * Resolution)]) + InputBiases[k]; } InputValues[k] = ActivationFunctions.Tanh(InputZVals[k]); } //Hidden for (int l = 0; l < HiddenDepth; l++) { for (int k = 0; k < HiddenCount; k++) { if (l == 0) { for (int j = 0; j < InputCount; j++) { //Former dropout code, if desired must be added to input and output as well //if (l == Depth - 2) { dropout = (r.NextDouble() <= DropoutRate ? 0 : 1); } else { dropout = 1; } HiddenZVals[l, k] += (((FirstHiddenWeights[k, j] + FirstHiddenWeightMomentum[k, j]) * InputValues[j]) + HiddenBiases[l, k]); } } else { for (int j = 0; j < HiddenCount; j++) { //Former dropout code, if desired must be added to input and output as well //if (l == Depth - 2) { dropout = (r.NextDouble() <= DropoutRate ? 0 : 1); } else { dropout = 1; } //Hiddenweights and momentum use l - 1 because the first layer is under firsthidden and firstmomentum respectively HiddenZVals[l, k] += (((HiddenWeights[l - 1, k, j] + HiddenWeightMomentum[l - 1, k, j]) * HiddenValues[l - 1, j]) + HiddenBiases[l, k]); } } HiddenValues[l, k] = ActivationFunctions.Tanh(HiddenZVals[l, k]); } } //Output for (int k = 0; k < OutputCount; k++) { for (int j = 0; j < HiddenCount; j++) { OutputZVals[k] += ((OutputWeights[k, j] + OutputWeightMomentum[k, j]) * HiddenValues[HiddenDepth - 1, j]); } //No activation function on outputs OutputValues[k] = OutputZVals[k]; //OutputValues[k] = ActivationFunctions.Tanh(OutputZVals[k]); } }
//Stochastic descent (all code below is done according to formulas) //This adds each NN's gradients to the avg public void Descend() { //Input for (int i = 0; i < InputCount; i++) { for (int ii = 0; ii < Resolution * Resolution; ii++) { //Nesterov momentum InputWeightMomentum[i, ii] = (InputWeightMomentum[i, ii] * Momentum) - (LearningRate * InputWeightGradient[i, ii]); AvgInputWeightGradient[i, ii] += InputWeightGradient[i, ii] + InputWeightMomentum[i, ii]; } double tempbias = InputErrorSignals[i] * ActivationFunctions.TanhDerriv(InputZVals[i]); InputBiasMomentum[i] = (InputBiasMomentum[i] * Momentum) - (LearningRate * tempbias); AvgInputBiasGradient[i] += tempbias + InputBiasMomentum[i]; } //Hidden for (int i = 0; i < HiddenDepth; i++) { for (int ii = 0; ii < HiddenCount; ii++) { if (i == 0) { for (int iii = 0; iii < InputCount; iii++) { //Nesterov momentum FirstHiddenWeightMomentum[ii, iii] = (FirstHiddenWeightMomentum[ii, iii] * Momentum) - (LearningRate * FirstHiddenWeightGradient[ii, iii]); AvgFirstHiddenWeightGradient[ii, iii] += FirstHiddenWeightGradient[ii, iii] + FirstHiddenWeightMomentum[ii, iii]; } } else { for (int iii = 0; iii < HiddenCount; iii++) { //Nesterov momentum HiddenWeightMomentum[i - 1, ii, iii] = (HiddenWeightMomentum[i - 1, ii, iii] * Momentum) - (LearningRate * HiddenWeightGradient[i - 1, ii, iii]); AvgHiddenWeightGradient[i - 1, ii, iii] += HiddenWeightGradient[i - 1, ii, iii] + HiddenWeightMomentum[i - 1, ii, iii]; } } double tempbias = HiddenErrorSignals[i, ii] * ActivationFunctions.TanhDerriv(HiddenZVals[i, ii]); HiddenBiasMomentum[i, ii] = (HiddenBiasMomentum[i, ii] * Momentum) - (LearningRate * tempbias); AvgHiddenBiasGradient[i, ii] += tempbias + HiddenBiasMomentum[i, ii]; } } //Output for (int i = 0; i < OutputCount; i++) { for (int ii = 0; ii < HiddenCount; ii++) { //Nesterov momentum OutputWeightMomentum[i, ii] = (OutputWeightMomentum[i, ii] * Momentum) - (LearningRate * OutputWeightGradient[i, ii]); AvgOutputWeightGradient[i, ii] += OutputWeightGradient[i, ii] + OutputWeightMomentum[i, ii]; } } }
//Read a matrix from a file offset by two bytes of metadata public static double[,] ReadNextImage() { //Singleton if (ImageReaderRunning) { throw new Exception("Already accessing file"); } //Read image FileStream fs = File.OpenRead(ImagePath); //Reset parameters and decrement NN hyperparameters upon new epoch (currently disabled) if (!(ImageOffset < fs.Length)) { ImageOffset = 16; LabelOffset = 8; } fs.Position = ImageOffset; byte[] b = new byte[Resolution * Resolution]; try { fs.Read(b, 0, Resolution * Resolution); } catch (Exception ex) { Console.WriteLine("Reader exception: " + ex.ToString()); Console.ReadLine(); } int[] array = Array.ConvertAll(b, Convert.ToInt32); ImageOffset += Resolution * Resolution; //Convert to 2d array double[,] result = new double[Resolution, Resolution]; //Convert array to doubles and store in result for (int i = 0; i < Resolution; i++) { for (int ii = 0; ii < Resolution; ii++) { result[i, ii] = (double)array[(Resolution * i) + ii]; } } //Normalize the result matrix ActivationFunctions.Normalize(result, true, Resolution, Resolution); fs.Close(); return(result); }
/// <summary> /// Backpropagation of error (formulas) /// </summary> /// <param name="image">The matrix (image) to be forward propagated from</param> /// <param name="correct">The number shown in the image</param> public void Backprop(double[,] image, int correct) { //Forward propagation of data Calculate(image); //Reset things about to be calculated InputErrorSignals = new double[InputCount]; HiddenErrorSignals = new double[HiddenDepth, HiddenCount]; OutputErrorSignals = new double[OutputCount]; InputWeightGradient = new double[InputCount, Resolution *Resolution]; FirstHiddenWeightGradient = new double[HiddenCount, InputCount]; HiddenWeightGradient = new double[HiddenDepth - 1, HiddenCount, HiddenCount]; OutputWeightGradient = new double[OutputCount, HiddenCount]; //Output //Foreach ending neuron for (int k = 0; k < OutputCount; k++) { double upperlayerderiv = 2d * ((k == correct ? 1d : 0d) - OutputValues[k]); OutputErrorSignals[k] = upperlayerderiv; //Calculate gradient //This works b/c of only 1 hidden layer, will need to be changed if HiddenDepth is modified for (int j = 0; j < HiddenCount; j++) { OutputWeightGradient[k, j] = HiddenValues[HiddenDepth - 1, j] * ActivationFunctions.TanhDerriv(OutputZVals[k]) * OutputErrorSignals[k]; } } //Hidden //Foreach layer of hidden 'neurons' //Calc errors for (int l = HiddenDepth - 1; l >= 0; l--) { //Hidden upper layer derrivative calculation //Foreach starting neuron if (l == HiddenDepth - 1) { for (int k = 0; k < HiddenCount; k++) { double upperlayerderiv = 0; //Foreach ending neuron for (int j = 0; j < OutputCount; j++) { //Hiddenweights uses l because the formula's l + 1 is l due to a lack of input layer in this array upperlayerderiv += OutputWeights[j, k] * ActivationFunctions.TanhDerriv(OutputZVals[j]) * OutputErrorSignals[j]; } HiddenErrorSignals[l, k] = upperlayerderiv; } } else { for (int k = 0; k < HiddenCount; k++) { double upperlayerderiv = 0; //Foreach ending neuron for (int j = 0; j < HiddenCount; j++) { //Hiddenweights uses l instead of l + 1 because firsthiddenweights is a different array upperlayerderiv += HiddenWeights[l, j, k] * ActivationFunctions.TanhDerriv(HiddenZVals[l + 1, j]) * HiddenErrorSignals[l + 1, j]; } HiddenErrorSignals[l, k] = upperlayerderiv; } } } //Calc values for (int l = 0; l < HiddenDepth; l++) { //Foreach starting neuron for (int k = 0; k < HiddenCount; k++) { if (l == 0) { //Foreach ending neuron neuron for (int j = 0; j < InputCount; j++) { FirstHiddenWeightGradient[k, j] = InputValues[j] * ActivationFunctions.TanhDerriv(HiddenZVals[l, k]) * HiddenErrorSignals[l, k]; } } else { //Foreach ending neuron neuron for (int j = 0; j < HiddenCount; j++) { HiddenWeightGradient[l - 1, k, j] = HiddenValues[l - 1, j] * ActivationFunctions.TanhDerriv(HiddenZVals[l, k]) * HiddenErrorSignals[l, k]; } } } } //Input //Foreach starting neuron for (int k = 0; k < InputCount; k++) { double upperlayerderiv = 0; //Calculate error signal //Foreach ending neuron for (int j = 0; j < HiddenCount; j++) { upperlayerderiv += FirstHiddenWeights[j, k] * ActivationFunctions.TanhDerriv(HiddenZVals[0, j]) * HiddenErrorSignals[0, j]; } InputErrorSignals[k] = upperlayerderiv; //Calculate gradient for (int j = 0; j < Resolution * Resolution; j++) { InputWeightGradient[k, j] = image[j / Resolution, j - ((j / Resolution) * Resolution)] * ActivationFunctions.TanhDerriv(InputZVals[k]) * InputErrorSignals[k]; } } //Normalize gradients (currently disabled as is obvious) /* * InputWeightGradient = ActivationFunctions.Normalize(InputWeightGradient, InputCount, Resolution * Resolution); * HiddenWeightGradient = ActivationFunctions.Normalize(HiddenWeightGradient, HiddenDepth, HiddenCount, InputCount); * OutputWeightGradient = ActivationFunctions.Normalize(OutputWeightGradient, OutputCount, HiddenCount); * //Normalize error signals (biases) * HiddenErrorSignals = ActivationFunctions.Normalize(HiddenErrorSignals, HiddenDepth, HiddenCount); * InputErrorSignals = ActivationFunctions.Normalize(InputErrorSignals); */ }