Exemple #1
0
        /// <summary>
        /// Returns numerical gradient
        /// </summary>
        /// <param name="test_layer_1"></param>
        /// <param name="test_layer_2"> set as Input_Layer to test layer_1 only </param>
        /// <param name="loss_layer"></param>
        /// <param name="I"></param>
        /// <param name="T"></param>
        /// <param name="h"></param>
        /// <returns> Tuple of analytic gradients <dB, dW, dI> </returns>
        public static Tuple <Tensor, Tensor, Tensor> numeric_grad(Base_Layer test_layer_1, Base_Layer test_layer_2, Base_Layer loss_layer, Tensor I, Tensor T, Double h = 0.00001)
        {
            Tensor I_copy;
            Tensor B          = test_layer_1.B;
            Tensor W          = test_layer_1.W;
            Tensor numeric_dB = new Tensor(B.dimensions, B.dim_1, B.dim_2, B.dim_3, B.dim_4);
            Tensor numeric_dW = new Tensor(W.dimensions, W.dim_1, W.dim_2, W.dim_3, W.dim_4);
            Tensor numeric_dI = new Tensor(I.dimensions, I.dim_1, I.dim_2, I.dim_3, I.dim_4);

            for (int i = 0; i < B.values.Length; i++)
            {
                I_copy = Utils.copy(I);
                test_layer_1.B.values[i] += h;
                Tensor L_up = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T);

                I_copy = Utils.copy(I);
                test_layer_1.B.values[i] -= 2 * h;
                Tensor L_down = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T);

                test_layer_1.B.values[i] += h;

                numeric_dB.values[i] = Utils.sum(Utils.subtract(L_up, L_down)) / (2 * h);
            }

            for (int i = 0; i < W.values.Length; i++)
            {
                I_copy = Utils.copy(I);
                test_layer_1.W.values[i] += h;
                Tensor L_up = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T);

                I_copy = Utils.copy(I);
                test_layer_1.W.values[i] -= 2 * h;
                Tensor L_down = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_copy), true), T);

                test_layer_1.W.values[i] += h;

                numeric_dW.values[i] = Utils.sum(Utils.subtract(L_up, L_down)) / (2 * h);
            }

            for (int i = 0; i < I.values.Length; i++)
            {
                Tensor I_up   = Utils.copy(I);
                Tensor I_down = Utils.copy(I);

                I_up.values[i]   += h;
                I_down.values[i] -= h;

                Tensor L_up   = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_up), true), T);
                Tensor L_down = loss_layer.loss(test_layer_2.forward(test_layer_1.forward(I_down), true), T);

                numeric_dI.values[i] = Utils.sum(Utils.subtract(L_up, L_down)) / (2 * h);
            }
            return(Tuple.Create(numeric_dB, numeric_dW, numeric_dI));
        }
Exemple #2
0
        /// <summary>
        /// Returns analytic gradient
        /// </summary>
        /// <param name="layer_1"></param>
        /// <param name="layer_2"> set as Input_Layer to test layer_1 only </param>
        /// <param name="loss_layer"></param>
        /// <param name="I"></param>
        /// <param name="T"></param>
        /// <returns > Tuple of analytic gradients <dB, dW, dI> </returns>
        public static Tuple <Tensor, Tensor, Tensor> analytic_grad(Base_Layer layer_1, Base_Layer layer_2, Base_Layer loss_layer, Tensor I, Tensor T)
        {
            Tensor I_copy = Utils.copy(I);
            Tensor analytic_dI, analytic_dB, analytic_dW;

            loss_layer.loss(layer_2.forward(layer_1.forward(I_copy), true), T);
            analytic_dI = layer_1.backward(layer_2.backward(loss_layer.backward()));
            analytic_dB = layer_1.dB;
            analytic_dW = layer_1.dW;

            return(Tuple.Create(analytic_dB, analytic_dW, analytic_dI));
        }