Esempio n. 1
0
        /// <summary>
        ///     This method runs the linear function z = MatrixMultiplication(w, A_prev) + b.
        /// </summary>
        /// <param name="previousLayersActivations">A vector containing the previous layers activations.</param>
        /// <param name="weights">A matrix containing the weights.</param>
        /// <param name="bias">A vector containing the bias'.</param>
        /// <returns>
        ///     The linear cache which holds the weights, bias and the previous layers activations. Also returns Z.
        /// </returns>
        private Tuple <LinearCache, MatrixVectors> LinearForward(MatrixVectors previousLayersActivations, MatrixVectors weights, MatrixVectors bias)
        {
            MatrixVectors z           = weights.Dot(previousLayersActivations).MatrixElementWise(bias, Operation.Add);
            LinearCache   linearCache = new LinearCache(weights, bias, previousLayersActivations);

            return(new Tuple <LinearCache, MatrixVectors>(linearCache, z));
        }
Esempio n. 2
0
        /// <summary>
        ///     This methods job is the calculate the activations of each layer.
        ///     It uses input layer as the first layers previous activations
        ///     and uses theta to calculate the linear function for the activations.
        ///
        ///     This method gathers the linear and z caches of every layer.
        ///     It will then generate a prediction(AL) as the final layers activations.
        /// </summary>
        /// <param name="xInput">The input layer of the network.</param>
        /// <param name="theta">The weights and biases of the network.</param>
        /// <param name="dims">Number of neurons in each layer of the network.</param>
        /// <returns>A tuple containing the linear and z caches along with the prediction.</returns>
        public Tuple <List <LinearCache>, List <MatrixVectors>, MatrixVectors> ForwardPropagation(MatrixVectors xInput, Dictionary <string, MatrixVectors> theta, int[] dims)
        {
            List <LinearCache>   linearCaches = new List <LinearCache>();
            List <MatrixVectors> z_cache      = new List <MatrixVectors>();

            MatrixVectors previousLayersactivations = xInput;

            for (int l = 1; l < dims.Length - 1; l++)
            {
                MatrixVectors weights = theta["W" + l];
                MatrixVectors bias    = theta["b" + l];
                Tuple <LinearCache, MatrixVectors, MatrixVectors> cacheAndActivation = ActivationsForward(previousLayersactivations, weights, bias, Activation.ReLu);

                LinearCache   linearCache = cacheAndActivation.Item1;
                MatrixVectors z           = cacheAndActivation.Item2;


                linearCaches.Add(linearCache);
                z_cache.Add(z);

                previousLayersactivations = cacheAndActivation.Item3;
            }

            MatrixVectors finalWeights = theta["W" + (dims.Length - 1).ToString()];
            MatrixVectors finalBias    = theta["b" + (dims.Length - 1).ToString()];
            Tuple <LinearCache, MatrixVectors, MatrixVectors> finalLinearCacheAndActivation = ActivationsForward(previousLayersactivations, finalWeights, finalBias, Activation.Sigmoid);

            LinearCache   finalLinearCache = finalLinearCacheAndActivation.Item1;
            MatrixVectors finalZ           = finalLinearCacheAndActivation.Item2;

            MatrixVectors finalActivation = finalLinearCacheAndActivation.Item3;

            linearCaches.Add(finalLinearCache);
            z_cache.Add(finalZ);

            Tuple <List <LinearCache>, List <MatrixVectors>, MatrixVectors> cachesAndActivation = new Tuple <List <LinearCache>, List <MatrixVectors>, MatrixVectors>(linearCaches, z_cache, finalActivation);

            return(cachesAndActivation);
        }
Esempio n. 3
0
        /// <summary>
        ///     This method runs the linear function and the specified activation function
        ///     to calculate the Z and A of the current layer.
        /// </summary>
        /// <param name="previousLayersActivations">Vector of the previous layer's activations.</param>
        /// <param name="weights">Matrix of the current layers weights.</param>
        /// <param name="bias">Vector of the current layers bias'.</param>
        /// <param name="activation">The type of activation function to use.</param>
        /// <returns>
        ///     It returns a tuple with the cache as the first item and the final activations as
        ///     the second item.
        /// </returns>
        private Tuple <LinearCache, MatrixVectors, MatrixVectors> ActivationsForward(MatrixVectors previousLayersActivations, MatrixVectors weights, MatrixVectors bias, Activation activation)
        {
            Tuple <LinearCache, MatrixVectors> cache = LinearForward(previousLayersActivations, weights, bias);
            MatrixVectors z = cache.Item2;
            MatrixVectors activationsVector;

            switch (activation)
            {
            case Activation.Sigmoid:
                activationsVector = Sigmoid(z);
                break;

            case Activation.ReLu:
                activationsVector = Relu(z);
                break;

            default:
                throw new ArgumentOutOfRangeException();
            }
            LinearCache linearCache = cache.Item1;

            return(new Tuple <LinearCache, MatrixVectors, MatrixVectors>(linearCache, z, activationsVector));
        }
Esempio n. 4
0
        /// <summary>
        ///     This method will calculate dC with respect to Z from one of the specified activations
        ///     then use this dC/dZ to calculate the other derivatives.
        /// </summary>
        /// <param name="dA">The derivative of the cost function with respect to the activations.</param>
        /// <param name="Z">The linear function of the weights biases and previous layers activations.</param>
        /// <param name="linearCache">A linear cache obtained from forward prop.</param>
        /// <param name="activation">The type of activation to use. Corrosponds with the activation that was used for this layer during forward prop.</param>
        /// <param name="lambda">The L2 regularization hyper-parameter.</param>
        /// <returns>The derivatives provided from the <see cref="LinearBackward"/> function.</returns>
        private Tuple <MatrixVectors, MatrixVectors, MatrixVectors> ActivationsBackward(MatrixVectors dA, MatrixVectors Z, LinearCache linearCache, Activation activation, float lambda)
        {
            MatrixVectors dZ;

            switch (activation)
            {
            case Activation.Sigmoid:
                dZ = SigmoidPrime(dA, Z);
                break;

            case Activation.ReLu:
                dZ = ReLuPrime(dA, Z);
                break;

            default:
                throw new ArgumentOutOfRangeException();
            }

            return(LinearBackward(dZ, linearCache, lambda));
        }
Esempio n. 5
0
        /// <summary>
        ///     This method calculates the derivatives of the parameters and the
        ///     derivative of the previous layers activations all with respect to to the
        ///     cross entropy cost function.
        /// </summary>
        /// <param name="dZ">The derivative of the cost function with respect to Z.</param>
        /// <param name="linearCache">A linear cache obtained from forward prop.</param>
        /// <param name="lambda">The L2 regularization hyper-parameter.</param>
        /// <returns>
        ///     The derivatives for gradient descent.
        /// </returns>
        private Tuple <MatrixVectors, MatrixVectors, MatrixVectors> LinearBackward(MatrixVectors dZ, LinearCache linearCache, float lambda)
        {
            MatrixVectors regularizedWeight = linearCache.weights.BroadcastScalar(lambda, Operation.Multiply);
            MatrixVectors dW            = dZ.Dot(linearCache.previousLayersActivations.Transpose());
            MatrixVectors dWRegularized = dW.MatrixElementWise(regularizedWeight, Operation.Add);
            MatrixVectors db            = dZ.MatrixAxisSummation(1);
            MatrixVectors dAPrev        = linearCache.weights.Transpose().Dot(dZ);

            if (!dW.CompareShape(linearCache.weights))
            {
                Console.WriteLine("Does not have the right shape for dW");
            }
            if (!db.CompareShape(linearCache.bias))
            {
                Console.WriteLine("Does not have the right shape for db");
            }
            if (!dAPrev.CompareShape(linearCache.previousLayersActivations))
            {
                Console.WriteLine("Does not have the right shape for dAPrev");
            }
            return(new Tuple <MatrixVectors, MatrixVectors, MatrixVectors>(dWRegularized, db, dAPrev));
        }