private void applyStep(GradStep step, DerivedLayer layer) { for (int i = 0; i < layer.weights.RowCount; i++) //iterate first by column because every column represents a neuron { for (int j = 0; j < layer.weights.ColumnCount; j++) //each row represents the jth weight for each neuron { layer.weights[i, j] -= learningFactor * step.weightStep[i, j]; //change each weight by its negative gradient } layer.biases[i] -= learningFactor * step.biasStep[i]; //change the bias by its negative gradient, one for each neuron in the layer } }
public GradStep[] backpropogate(double[] expected) { double[][] errorPerActivation = new double[hiddenLayers.Count() + 1][]; //change in error per change in activation, multiplied by change in activation per change in weight/bias to give gradient GradStep[] steps = new GradStep[hiddenLayers.Count() + 1]; //the changes in weights and biases for each hidden layer, plus the output //Calculate changes for final layer errorPerActivation[hiddenLayers.Count()] = new double[output.weights.RowCount]; //have to initialize each array because it's jagged and c# is the dicks steps[steps.Count() - 1] = new GradStep(output.weights.RowCount, output.weights.ColumnCount, output.neurons.Count); //initialize gradstep for the output for (int i = 0; i < output.weights.RowCount; i++) { errorPerActivation[hiddenLayers.Count()][i] = 2 * (expected[i] - output.neurons[i]) * Compressions.derReLU(output.neurons[i]); //change in error per change in activation for the final layer for (int j = 0; j < output.weights.ColumnCount; j++) { steps[steps.Count() - 1].weightStep[i, j] = errorPerActivation[hiddenLayers.Count()][i] * hiddenLayers[hiddenLayers.Count() - 1].neurons[j]; //the change in weight is the error/activation times activation/weight } steps[steps.Count() - 1].biasStep[i] = errorPerActivation[hiddenLayers.Count()][i]; //change in error with respect to bias is just error/activation, since activation/bias is 1 (bias is constant) } for (int i = hiddenLayers.Count() - 1; i >= 0; i--) //for every hidden layer, iterating backwards since we need the next layer's error { DerivedLayer nextLayer; Layer previousLayer; errorPerActivation[i] = new double[hiddenLayers[i].weights.RowCount]; //have to initialize each array because it's jagged and c# is the balls steps[i] = new GradStep(hiddenLayers[i].weights.RowCount, hiddenLayers[i].weights.ColumnCount, hiddenLayers[i].biases.Count); //holds suggested changes to weights and biases in this layer if (i < hiddenLayers.Count() - 1) { nextLayer = hiddenLayers[i + 1]; //use values from next hidden layer } else { nextLayer = output; //unless it's the final hidden layer, in which case we need values of output } if (i > 0) { previousLayer = hiddenLayers[i - 1]; //use values from previous hidden layer } else { previousLayer = input; //unless it's the final hidden layer, in which case we need values of input } for (int j = 0; j < hiddenLayers[i].weights.RowCount; j++) //in every row { double activation = Compressions.derReLU(hiddenLayers[i].neurons[j]); //find the change in error per change in activation errorPerActivation[i][j] = 0; for (int k = 0; k < nextLayer.neurons.Count(); k++) //since this activation affects every neuron in next layer, we need changes for all of them { errorPerActivation[i][j] += nextLayer.weights[k, j] * errorPerActivation[i + 1][k]; //change in activation per weight times change in error per activation steps[i].biasStep[j] += errorPerActivation[i + 1][k]; //change in error with respect to bias is just error/activation, since activation/bias is 1 (bias is constant) } errorPerActivation[i][j] *= activation; //multiply error in activation by derivative of compression to complete the gradient for (int k = 0; k < hiddenLayers[i].weights.ColumnCount; k++) //adjust every weight { steps[i].weightStep[j, k] = errorPerActivation[i][j] * previousLayer.neurons[k]; } } } return(steps); }