public static Tensor SRN( this Session session, Tensor x, Tensor w, Tensor u, Tensor b, int numberOfNeurons, MatrixLayout matrixLayout) { const string ActionName = "srn"; // calculate gates = W * x + b Tensor y = session.FullyConnected(x, w, b, matrixLayout); int tt = y.Shape.GetAxis(Axis.B); // number of vectors in time sequence float[] uw = u.Weights; float[] yw = y.Weights; // add hidden layer to the output tensor // y += U * y(t-1) (product of hidden weight matrix and hidden vector) Nonlinearity.ReLU(numberOfNeurons, yw, 0, yw, 0); for (int t = 1, yi = numberOfNeurons; t < tt; t++, yi += numberOfNeurons) { Matrix.MxV(matrixLayout, numberOfNeurons, numberOfNeurons, uw, 0, false, yw, yi - numberOfNeurons, yw, yi, false); // TODO: customize activation function Nonlinearity.ReLU(numberOfNeurons, yw, yi, yw, yi); } if (session.CalculateGradients) { session.Push( ActionName, () => { float[] duw = u.Gradient; float[] dyw = y.Gradient; for (int t = tt - 1, yi = t * numberOfNeurons; t > 0; t--, yi -= numberOfNeurons) { Nonlinearity.ReLUGradient(numberOfNeurons, dyw, yi, true, yw, yi, dyw, yi); // dA += dy * x' lock (u) { Matrix.VxV(matrixLayout, numberOfNeurons, numberOfNeurons, dyw, yi, yw, yi - numberOfNeurons, duw, 0, false); } // dx += A' * dy Matrix.MxV(matrixLayout, numberOfNeurons, numberOfNeurons, uw, 0, true, dyw, yi, dyw, yi - numberOfNeurons, false); } Nonlinearity.ReLUGradient(numberOfNeurons, dyw, 0, true, yw, 0, dyw, 0); }); } return(y); }
public void ForwardBackwardTest1() { const int T = 2; const int N = 3; Session session = new Session(); SRNCell layer = new SRNCell(new Shape(new[] { -1, N }), RNNDirection.ForwardOnly, 2, MatrixLayout.RowMajor, null); layer.W.Randomize(this.random); layer.U.Randomize(this.random); layer.B.Randomize(this.random); ////layer.W.Set(new float[] { 0.1f, 0.2f, -0.3f, 0.4f, 0.5f, 0.6f }); // 3x2 matrix ////layer.U.Set(new float[] { 0.1f, 0.2f, 0.3f, 0.4f }); // 2x2 matrix ////layer.B.Set(new float[] { 0.1f, 0.2f }); // 2x1 vector Tensor x = new Tensor(null, new[] { T, N }); x.Randomize(this.random); ////x.Set(new float[] { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f }); IList <Tensor> xs = new[] { x }; IList <Tensor> ys = layer.Forward(session, xs); float[] bw = layer.B.Weights; Tensor expected = new Tensor(null, new[] { 2, 2 }); expected.Weights[0] = Matrix.DotProduct(3, layer.W.Weights, 0, x.Weights, 0) + bw[0]; expected.Weights[1] = Matrix.DotProduct(3, layer.W.Weights, 3, x.Weights, 0) + bw[1]; Nonlinearity.ReLU(2, expected.Weights, 0, expected.Weights, 0); expected.Weights[2] = Matrix.DotProduct(3, layer.W.Weights, 0, x.Weights, 3) + bw[0] + Matrix.DotProduct(2, layer.U.Weights, 0, expected.Weights, 0); expected.Weights[3] = Matrix.DotProduct(3, layer.W.Weights, 3, x.Weights, 3) + bw[1] + Matrix.DotProduct(2, layer.U.Weights, 2, expected.Weights, 0); Nonlinearity.ReLU(2, expected.Weights, 2, expected.Weights, 2); Helpers.AreTensorsEqual(expected, ys[0]); // unroll the graph ////session.GetGradient(ys[0]).Randomize(this.random); ys[0].SetGradient(new float[] { 0.1f, 0.2f, 0.3f, 0.4f }); session.Unroll(); ////float[] dy = session.GetGradient(ys[0]).Weights.ToArray(); float[] dy = new float[] { 0.1f, 0.2f, 0.3f, 0.4f }; float[] expectedWG = new float[layer.W.Length]; float[] expectedUG = new float[layer.U.Length]; float[] expectedBG = new float[layer.B.Length]; float[] expectedDx = new float[x.Length]; for (int oi = 2, ii = 3; oi >= 0; oi -= 2, ii -= 3) { Nonlinearity.ReLUGradient(2, dy, oi, true, expected.Weights, oi, dy, oi); // should be x' * dy Matrix.VxV(MatrixLayout.ColumnMajor, 3, 2, x.Weights, ii, dy, oi, expectedWG, 0, false); // should be W' * dy Matrix.MxV(MatrixLayout.ColumnMajor, 3, 2, layer.W.Weights, 0, false, dy, oi, expectedDx, ii, true); if (oi > 0) { // should be x(t-1)' * dy Matrix.VxV(MatrixLayout.ColumnMajor, 2, 2, expected.Weights, oi - 2, dy, oi, expectedUG, 0, false); // should be U' * dy Matrix.MxV(MatrixLayout.ColumnMajor, 2, 2, layer.U.Weights, 0, false, dy, oi, dy, oi - 2, false); ////MKL.MxV(MatrixLayout.RowMajor, 2, 2, layer.U.Weights, 0, false, dy, oi, dy, oi - 2, false); } // should be dy Mathematics.Add(2, dy, oi, expectedBG, 0); } Helpers.AreArraysEqual(expectedWG, layer.W.Gradient); ////Helpers.AreArraysEqual(expectedUG, layer.U.Gradient); Helpers.AreArraysEqual(expectedBG, layer.B.Gradient); Helpers.AreArraysEqual(expectedDx, x.Gradient); }