/// <summary> /// Constructor that generates the neural network /// </summary> /// <param name="sizes">The list ``sizes`` contains the number of neurons in the respective /// layers of the network. For example, if the list was [2, 3, 1] /// then it would be a three-layer network, with the first layer /// containing 2 neurons, the second layer 3 neurons, and the /// third layer 1 neuron.</param> public NeuralNetwork(int[] sizes) { //Initialize each weight using a Gaussian distribution with mean 0 //and standard deviation 1 over the square root of the number of //weights connecting to the same neuron. Initialize the biases //using a Gaussian distribution with mean 0 and standard //deviation 1. //Note that the first layer is assumed to be an input layer, and //by convention we won't set any biases for those neurons, since //biases are only ever used in computing the outputs from later //layers. m_biases = new MyVector[sizes.Length]; m_weights = new My2DMatrix[sizes.Length]; m_biases[0] = new MyVector(sizes[0]); // Just for the NumOfNeuronsInLayer for (int i = 1; i < sizes.Length; i++) { m_biases[i] = new MyVector(sizes[i]); m_weights[i] = new My2DMatrix(sizes[i], sizes[i - 1]); for (int j = 0; j < sizes[i]; j++) { m_biases[i][j] = NextGaussianDistribution(); for (int k = 0; k < sizes[i - 1]; k++) { m_weights[i][j, k] = NextGaussianDistribution(0, 1.0 / Math.Sqrt(sizes[i - 1])); } } } }
private static double[][] ToArrayOfArrays(My2DMatrix matrix) { double[][] result = new double[matrix.m_values.GetLength(0)][]; for (int i = 0; i < matrix.m_values.GetLength(0); i++) { result[i] = new double[matrix.m_values.GetLength(1)]; for (int j = 0; j < matrix.m_values.GetLength(1); j++) { result[i][j] = matrix.m_values[i, j]; } } return(result); }
public My2DMatrix Transpose() { My2DMatrix result = new My2DMatrix(ColumnsCount, RowsCount); for (int i = 0; i < RowsCount; i++) { for (int j = 0; j < ColumnsCount; j++) { result[j, i] = this[i, j]; } } return(result); }
/// <summary> /// Each value multiplied by the constant /// </summary> /// <param name="constant"></param> /// <param name="matrix"></param> /// <returns></returns> public static My2DMatrix operator *(double constant, My2DMatrix matrix) { My2DMatrix result = new My2DMatrix(matrix.RowsCount, matrix.ColumnsCount); for (int i = 0; i < matrix.RowsCount; i++) { for (int j = 0; j < matrix.ColumnsCount; j++) { result[i, j] = constant * matrix[i, j]; } } return(result); }
private void UpdateMiniBatch(Tuple <MyVector, MyVector>[] miniBatch, double eta, double lambda, int n, Cancel cancelFnc) { // Represents the sum of the the partial derivvatives of Cost function with respect to bias over the batch MyVector[] parcDerivBiases = new MyVector[NumOfLayers]; for (int i = 1; i < parcDerivBiases.Length; i++) { if (cancelFnc != null && cancelFnc()) { return; } parcDerivBiases[i] = new MyVector(NumOfNeuronsInLayer(i)); } // Represents the sum of the the partial derivvatives of Cost function with respect to weights over the batch My2DMatrix[] parcDerivWeights = new My2DMatrix[NumOfLayers]; for (int i = 1; i < parcDerivBiases.Length; i++) { if (cancelFnc != null && cancelFnc()) { return; } parcDerivWeights[i] = new My2DMatrix(m_weights[i].RowsCount, m_weights[i].ColumnsCount); } for (int i = 0; i < miniBatch.Length; i++) { // calculate the partial derivatives var delta = Backpropagate(miniBatch[i].Item1, miniBatch[i].Item2, cancelFnc); for (int j = 1; j < NumOfLayers; j++) { if (cancelFnc != null && cancelFnc()) { return; } parcDerivBiases[j] = parcDerivBiases[j] + delta.Item1[j]; // Vector + Vector parcDerivWeights[j] = parcDerivWeights[j] + delta.Item2[j]; // Matrix + Matrix } } for (int i = 1; i < NumOfLayers; i++) { if (cancelFnc != null && cancelFnc()) { return; } m_weights[i] = (1 - eta * (lambda / n)) * m_weights[i] - (eta / miniBatch.Length) * parcDerivWeights[i]; // (const * Matrix) - (const*Matrix) m_biases[i] = m_biases[i] - (eta / miniBatch.Length) * parcDerivBiases[i]; // Vector - (const*Vector) } }
/// <summary> /// Difference of two matrices /// </summary> /// <param name="mat1"></param> /// <param name="mat2"></param> /// <returns></returns> public static My2DMatrix operator -(My2DMatrix mat1, My2DMatrix mat2) { if (mat1.RowsCount != mat2.RowsCount || mat1.ColumnsCount != mat2.ColumnsCount) { throw new RankException("Wrong dimensions for subtract operation."); } My2DMatrix result = new My2DMatrix(mat1.RowsCount, mat1.ColumnsCount); for (int i = 0; i < mat1.RowsCount; i++) { for (int j = 0; j < mat1.ColumnsCount; j++) { result[i, j] = mat1[i, j] - mat2[i, j]; } } return(result); }
private Tuple <MyVector[], My2DMatrix[]> Backpropagate(MyVector input, MyVector diseredOutput, Cancel cancelFnc) { MyVector[] parcDerivBiases = new MyVector[NumOfLayers]; My2DMatrix[] parcDerivWeights = new My2DMatrix[NumOfLayers]; MyVector activation = new MyVector(input); List <MyVector> activations = new List <MyVector>(NumOfLayers) { activation }; // Feedfoward List <MyVector> weightedInputs = new List <MyVector>(NumOfLayers); for (int i = 1; i < NumOfLayers; i++) { if (cancelFnc != null && cancelFnc()) { return(null); } MyVector weightedInput = (m_weights[i] * activation) + m_biases[i]; weightedInputs.Add(weightedInput); activation = weightedInput.ApplyFunction(Sigmoid); activations.Add(activation); } // Backpropagate MyVector delta = CrossEntrophyDelta(activations[activations.Count - 1], diseredOutput); parcDerivBiases[parcDerivBiases.Length - 1] = delta; parcDerivWeights[parcDerivWeights.Length - 1] = new My2DMatrix(delta, activations[activations.Count - 2]); for (int l = 2; l < NumOfLayers; l++) { if (cancelFnc != null && cancelFnc()) { return(null); } MyVector weightedInput = weightedInputs[weightedInputs.Count - l]; var spv = weightedInput.ApplyFunction(SigmoidPrime); delta = (m_weights[m_weights.Length - l + 1].Transpose() * delta) * spv; parcDerivBiases[parcDerivBiases.Length - l] = delta; // the error parcDerivWeights[parcDerivWeights.Length - l] = new My2DMatrix(delta, activations[activations.Count - l - 1]); // error times the activation from the previous layer (vector times vector^T creates matrix) } return(new Tuple <MyVector[], My2DMatrix[]>(parcDerivBiases, parcDerivWeights)); }