/// <summary> /// This method runs the linear function z = MatrixMultiplication(w, A_prev) + b. /// </summary> /// <param name="previousLayersActivations">A vector containing the previous layers activations.</param> /// <param name="weights">A matrix containing the weights.</param> /// <param name="bias">A vector containing the bias'.</param> /// <returns> /// The linear cache which holds the weights, bias and the previous layers activations. Also returns Z. /// </returns> private Tuple <LinearCache, MatrixVectors> LinearForward(MatrixVectors previousLayersActivations, MatrixVectors weights, MatrixVectors bias) { MatrixVectors z = weights.Dot(previousLayersActivations).MatrixElementWise(bias, Operation.Add); LinearCache linearCache = new LinearCache(weights, bias, previousLayersActivations); return(new Tuple <LinearCache, MatrixVectors>(linearCache, z)); }
/// <summary> /// This methods job is the calculate the activations of each layer. /// It uses input layer as the first layers previous activations /// and uses theta to calculate the linear function for the activations. /// /// This method gathers the linear and z caches of every layer. /// It will then generate a prediction(AL) as the final layers activations. /// </summary> /// <param name="xInput">The input layer of the network.</param> /// <param name="theta">The weights and biases of the network.</param> /// <param name="dims">Number of neurons in each layer of the network.</param> /// <returns>A tuple containing the linear and z caches along with the prediction.</returns> public Tuple <List <LinearCache>, List <MatrixVectors>, MatrixVectors> ForwardPropagation(MatrixVectors xInput, Dictionary <string, MatrixVectors> theta, int[] dims) { List <LinearCache> linearCaches = new List <LinearCache>(); List <MatrixVectors> z_cache = new List <MatrixVectors>(); MatrixVectors previousLayersactivations = xInput; for (int l = 1; l < dims.Length - 1; l++) { MatrixVectors weights = theta["W" + l]; MatrixVectors bias = theta["b" + l]; Tuple <LinearCache, MatrixVectors, MatrixVectors> cacheAndActivation = ActivationsForward(previousLayersactivations, weights, bias, Activation.ReLu); LinearCache linearCache = cacheAndActivation.Item1; MatrixVectors z = cacheAndActivation.Item2; linearCaches.Add(linearCache); z_cache.Add(z); previousLayersactivations = cacheAndActivation.Item3; } MatrixVectors finalWeights = theta["W" + (dims.Length - 1).ToString()]; MatrixVectors finalBias = theta["b" + (dims.Length - 1).ToString()]; Tuple <LinearCache, MatrixVectors, MatrixVectors> finalLinearCacheAndActivation = ActivationsForward(previousLayersactivations, finalWeights, finalBias, Activation.Sigmoid); LinearCache finalLinearCache = finalLinearCacheAndActivation.Item1; MatrixVectors finalZ = finalLinearCacheAndActivation.Item2; MatrixVectors finalActivation = finalLinearCacheAndActivation.Item3; linearCaches.Add(finalLinearCache); z_cache.Add(finalZ); Tuple <List <LinearCache>, List <MatrixVectors>, MatrixVectors> cachesAndActivation = new Tuple <List <LinearCache>, List <MatrixVectors>, MatrixVectors>(linearCaches, z_cache, finalActivation); return(cachesAndActivation); }
/// <summary> /// This method runs the linear function and the specified activation function /// to calculate the Z and A of the current layer. /// </summary> /// <param name="previousLayersActivations">Vector of the previous layer's activations.</param> /// <param name="weights">Matrix of the current layers weights.</param> /// <param name="bias">Vector of the current layers bias'.</param> /// <param name="activation">The type of activation function to use.</param> /// <returns> /// It returns a tuple with the cache as the first item and the final activations as /// the second item. /// </returns> private Tuple <LinearCache, MatrixVectors, MatrixVectors> ActivationsForward(MatrixVectors previousLayersActivations, MatrixVectors weights, MatrixVectors bias, Activation activation) { Tuple <LinearCache, MatrixVectors> cache = LinearForward(previousLayersActivations, weights, bias); MatrixVectors z = cache.Item2; MatrixVectors activationsVector; switch (activation) { case Activation.Sigmoid: activationsVector = Sigmoid(z); break; case Activation.ReLu: activationsVector = Relu(z); break; default: throw new ArgumentOutOfRangeException(); } LinearCache linearCache = cache.Item1; return(new Tuple <LinearCache, MatrixVectors, MatrixVectors>(linearCache, z, activationsVector)); }
/// <summary> /// This method will calculate dC with respect to Z from one of the specified activations /// then use this dC/dZ to calculate the other derivatives. /// </summary> /// <param name="dA">The derivative of the cost function with respect to the activations.</param> /// <param name="Z">The linear function of the weights biases and previous layers activations.</param> /// <param name="linearCache">A linear cache obtained from forward prop.</param> /// <param name="activation">The type of activation to use. Corrosponds with the activation that was used for this layer during forward prop.</param> /// <param name="lambda">The L2 regularization hyper-parameter.</param> /// <returns>The derivatives provided from the <see cref="LinearBackward"/> function.</returns> private Tuple <MatrixVectors, MatrixVectors, MatrixVectors> ActivationsBackward(MatrixVectors dA, MatrixVectors Z, LinearCache linearCache, Activation activation, float lambda) { MatrixVectors dZ; switch (activation) { case Activation.Sigmoid: dZ = SigmoidPrime(dA, Z); break; case Activation.ReLu: dZ = ReLuPrime(dA, Z); break; default: throw new ArgumentOutOfRangeException(); } return(LinearBackward(dZ, linearCache, lambda)); }
/// <summary> /// This method calculates the derivatives of the parameters and the /// derivative of the previous layers activations all with respect to to the /// cross entropy cost function. /// </summary> /// <param name="dZ">The derivative of the cost function with respect to Z.</param> /// <param name="linearCache">A linear cache obtained from forward prop.</param> /// <param name="lambda">The L2 regularization hyper-parameter.</param> /// <returns> /// The derivatives for gradient descent. /// </returns> private Tuple <MatrixVectors, MatrixVectors, MatrixVectors> LinearBackward(MatrixVectors dZ, LinearCache linearCache, float lambda) { MatrixVectors regularizedWeight = linearCache.weights.BroadcastScalar(lambda, Operation.Multiply); MatrixVectors dW = dZ.Dot(linearCache.previousLayersActivations.Transpose()); MatrixVectors dWRegularized = dW.MatrixElementWise(regularizedWeight, Operation.Add); MatrixVectors db = dZ.MatrixAxisSummation(1); MatrixVectors dAPrev = linearCache.weights.Transpose().Dot(dZ); if (!dW.CompareShape(linearCache.weights)) { Console.WriteLine("Does not have the right shape for dW"); } if (!db.CompareShape(linearCache.bias)) { Console.WriteLine("Does not have the right shape for db"); } if (!dAPrev.CompareShape(linearCache.previousLayersActivations)) { Console.WriteLine("Does not have the right shape for dAPrev"); } return(new Tuple <MatrixVectors, MatrixVectors, MatrixVectors>(dWRegularized, db, dAPrev)); }