public void TestForwardCrf() { var rng = NN.Random.Seed(20130601); var o = T.Matrix <float>("o"); var c = T.Tensor3 <float>("c"); var f = T.Function(input: (o, c), output: Crf.Forward(o, c)); var g = T.Function(input: (o, c), output: Crf.Forward(o, c, viterbi: true)); for (int i = 0; i < 20; i++) { var num_labels = rng.Next(2, 10); var num_timesteps = rng.Next(2, 10); var obs = NN.Random.Uniform(-1, 1, num_timesteps, num_labels); var chain = NN.Random.Uniform(-1, 1, num_labels, num_labels, num_labels); } }
public void RnnXorHasCorrectGradient() { NN.Random.Seed(12345); int nh = 10; // hidden layer var Wbit = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "Wbit"); var Wstate = T.Shared(NN.Eye <float>(nh), "Wstate"); var Wout = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, 1, nh).As <float>(), "Wout"); var b = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "b"); var state0 = T.Shared(NN.Zeros <float>(nh, 1), "state0"); var bits = T.Tensor3 <float>("bits"); // n x 1 var expected = T.Matrix <float>("expected"); // 1 x 1 Func <Tensor <float>, Tensor <float>, Tensor <float> > recurrence = (bit, oldState) => { return(T.Tanh(T.Dot(Wbit, bit) + T.Dot(Wstate, oldState) + b)); }; var states = T.Scan(fn: recurrence, sequence: bits, outputsInfo: state0); var output = T.Tanh(T.Dot(Wout, states[(Slice)(-1)])); var error = 0.5f * T.Norm2(output - expected); var classify = T.Function(bits, output); var gradients = T.Grad(error); var gradWstate = gradients[Wstate]; Assert.IsNotNull(gradWstate); var gradWstateIsReshape = gradWstate as Reshaping <float>; Assert.IsNotNull(gradWstateIsReshape); var gradWstateIsSum = gradWstateIsReshape.x as Sum <float>; Assert.IsNotNull(gradWstateIsSum); var dfor = gradWstateIsSum.x as Tensor <float> .For; var backLoop = dfor.Loop; Assert.AreEqual(3, backLoop.Sequences.Count); // bit, states, delta Assert.AreEqual(6, backLoop.Fors.Count); // dbit, dstate, dWstate, db, dWbit, dstate_p1 Assert.AreEqual(3, dfor.Index); // TODO: check why a recursive was expected //var dWstate_ = dfor.RecursiveVariable; //Assert.AreEqual("dWstate_", dWstate_.Name); var variables = backLoop.Variables.Cast <Tensor <float> >().ToList(); var bit_ = variables[0]; Assert.AreEqual("bit_", bit_.Name); var oldState_ = variables[1]; Assert.AreEqual("oldState_", oldState_.Name); var delta_oldState_ = variables[2]; Assert.AreEqual("delta_oldState_", delta_oldState_.Name); var dbit_ = variables[3]; Assert.AreEqual("dbit_", dbit_.Name); var doldState_ = variables[4]; Assert.AreEqual("doldState_", doldState_.Name); var oldState_tp1_ = variables[5]; Assert.AreEqual("oldState_tp1", oldState_tp1_.Name); var d = T.Sum((delta_oldState_ + doldState_) * (1f - T.Square(oldState_tp1_)), axis: 1, keepDims: true); var doldState = (Tensor <float>)backLoop.Fors[1].Expression; (T.Dot(Wstate, d, transposeX: true)).AssertEqual(doldState); var dWstate = (Tensor <float>)backLoop.Fors[3].Expression; var dWstateExp = T.Dot(d, oldState_, transposeY: true); dWstateExp.AssertEqual(dWstate); var dbit = (Tensor <float>)backLoop.Fors[0].Expression; (T.Dot(Wbit, d, transposeX: true)).StructuralEquality(dbit); var oldState_tp1 = (Tensor <float>)backLoop.Fors[5].Expression; oldState_tp1.AssertEqual(oldState_); }