Beispiel #1
0
        public static Tensor SRN(
            this Session session,
            Tensor x,
            Tensor w,
            Tensor u,
            Tensor b,
            int numberOfNeurons,
            MatrixLayout matrixLayout)
        {
            const string ActionName = "srn";

            // calculate gates = W * x + b
            Tensor y = session.FullyConnected(x, w, b, matrixLayout);

            int tt = y.Shape.GetAxis(Axis.B);               // number of vectors in time sequence

            float[] uw = u.Weights;
            float[] yw = y.Weights;

            // add hidden layer to the output tensor
            // y += U * y(t-1) (product of hidden weight matrix and hidden vector)
            Nonlinearity.ReLU(numberOfNeurons, yw, 0, yw, 0);

            for (int t = 1, yi = numberOfNeurons; t < tt; t++, yi += numberOfNeurons)
            {
                Matrix.MxV(matrixLayout, numberOfNeurons, numberOfNeurons, uw, 0, false, yw, yi - numberOfNeurons, yw, yi, false);

                // TODO: customize activation function
                Nonlinearity.ReLU(numberOfNeurons, yw, yi, yw, yi);
            }

            if (session.CalculateGradients)
            {
                session.Push(
                    ActionName,
                    () =>
                {
                    float[] duw = u.Gradient;
                    float[] dyw = y.Gradient;

                    for (int t = tt - 1, yi = t * numberOfNeurons; t > 0; t--, yi -= numberOfNeurons)
                    {
                        Nonlinearity.ReLUGradient(numberOfNeurons, dyw, yi, true, yw, yi, dyw, yi);

                        // dA += dy * x'
                        lock (u)
                        {
                            Matrix.VxV(matrixLayout, numberOfNeurons, numberOfNeurons, dyw, yi, yw, yi - numberOfNeurons, duw, 0, false);
                        }

                        // dx += A' * dy
                        Matrix.MxV(matrixLayout, numberOfNeurons, numberOfNeurons, uw, 0, true, dyw, yi, dyw, yi - numberOfNeurons, false);
                    }

                    Nonlinearity.ReLUGradient(numberOfNeurons, dyw, 0, true, yw, 0, dyw, 0);
                });
            }

            return(y);
        }
Beispiel #2
0
        public void ForwardBackwardTest1()
        {
            const int T = 2;
            const int N = 3;

            Session session = new Session();

            SRNCell layer = new SRNCell(new Shape(new[] { -1, N }), RNNDirection.ForwardOnly, 2, MatrixLayout.RowMajor, null);

            layer.W.Randomize(this.random);
            layer.U.Randomize(this.random);
            layer.B.Randomize(this.random);
            ////layer.W.Set(new float[] { 0.1f, 0.2f, -0.3f, 0.4f, 0.5f, 0.6f });        // 3x2 matrix
            ////layer.U.Set(new float[] { 0.1f, 0.2f, 0.3f, 0.4f });                     // 2x2 matrix
            ////layer.B.Set(new float[] { 0.1f, 0.2f });                                 // 2x1 vector

            Tensor x = new Tensor(null, new[] { T, N });

            x.Randomize(this.random);
            ////x.Set(new float[] { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f });
            IList <Tensor> xs = new[] { x };
            IList <Tensor> ys = layer.Forward(session, xs);

            float[] bw       = layer.B.Weights;
            Tensor  expected = new Tensor(null, new[] { 2, 2 });

            expected.Weights[0] = Matrix.DotProduct(3, layer.W.Weights, 0, x.Weights, 0) + bw[0];
            expected.Weights[1] = Matrix.DotProduct(3, layer.W.Weights, 3, x.Weights, 0) + bw[1];
            Nonlinearity.ReLU(2, expected.Weights, 0, expected.Weights, 0);
            expected.Weights[2] = Matrix.DotProduct(3, layer.W.Weights, 0, x.Weights, 3) + bw[0] + Matrix.DotProduct(2, layer.U.Weights, 0, expected.Weights, 0);
            expected.Weights[3] = Matrix.DotProduct(3, layer.W.Weights, 3, x.Weights, 3) + bw[1] + Matrix.DotProduct(2, layer.U.Weights, 2, expected.Weights, 0);
            Nonlinearity.ReLU(2, expected.Weights, 2, expected.Weights, 2);
            Helpers.AreTensorsEqual(expected, ys[0]);

            // unroll the graph
            ////session.GetGradient(ys[0]).Randomize(this.random);
            ys[0].SetGradient(new float[] { 0.1f, 0.2f, 0.3f, 0.4f });
            session.Unroll();

            ////float[] dy = session.GetGradient(ys[0]).Weights.ToArray();
            float[] dy         = new float[] { 0.1f, 0.2f, 0.3f, 0.4f };
            float[] expectedWG = new float[layer.W.Length];
            float[] expectedUG = new float[layer.U.Length];
            float[] expectedBG = new float[layer.B.Length];
            float[] expectedDx = new float[x.Length];

            for (int oi = 2, ii = 3; oi >= 0; oi -= 2, ii -= 3)
            {
                Nonlinearity.ReLUGradient(2, dy, oi, true, expected.Weights, oi, dy, oi);

                // should be x' * dy
                Matrix.VxV(MatrixLayout.ColumnMajor, 3, 2, x.Weights, ii, dy, oi, expectedWG, 0, false);

                // should be W' * dy
                Matrix.MxV(MatrixLayout.ColumnMajor, 3, 2, layer.W.Weights, 0, false, dy, oi, expectedDx, ii, true);

                if (oi > 0)
                {
                    // should be x(t-1)' * dy
                    Matrix.VxV(MatrixLayout.ColumnMajor, 2, 2, expected.Weights, oi - 2, dy, oi, expectedUG, 0, false);

                    // should be U' * dy
                    Matrix.MxV(MatrixLayout.ColumnMajor, 2, 2, layer.U.Weights, 0, false, dy, oi, dy, oi - 2, false);
                    ////MKL.MxV(MatrixLayout.RowMajor, 2, 2, layer.U.Weights, 0, false, dy, oi, dy, oi - 2, false);
                }

                // should be dy
                Mathematics.Add(2, dy, oi, expectedBG, 0);
            }

            Helpers.AreArraysEqual(expectedWG, layer.W.Gradient);
            ////Helpers.AreArraysEqual(expectedUG, layer.U.Gradient);
            Helpers.AreArraysEqual(expectedBG, layer.B.Gradient);
            Helpers.AreArraysEqual(expectedDx, x.Gradient);
        }