コード例 #1
0
        public void SumProductWithSharedHasCorrectGrad()
        {
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");
            var b = T.Vector <float>("b");

            // sum xs in the accumulator
            Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) =>
                                                                                  new List <Tensor <float> >()
            {
                x + a, x *a + b
            };
            var loop = T.Scan(rec, xs, new[] { z, null });

            // get the last value
            var prod = loop[1][-1];
            var cost = T.Sum(prod);
            //var dz = T.Grad(cost, z);
            var db = T.Grad(cost, b);

            var reshape = db as Reshaping <float>;
            var sum     = reshape.x as Sum <float>;

            Assert.AreEqual(0, sum.Axis);

            var dfor     = sum.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count);
            Assert.AreEqual(4, backLoop.Fors.Count);
            Assert.AreEqual(2, dfor.Index);

            // TODO: check why a recursive was expected
            //var db_ = dfor.RecursiveVariable;
            //Assert.AreEqual("db_", db_.Name);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var x_        = variables[0];

            Assert.AreEqual("x_", x_.Name);
            var a_ = variables[1];

            Assert.AreEqual("a_", a_.Name);
            var d_f1_ = variables[2];

            Assert.AreEqual("delta_f1_", d_f1_.Name);
            var da_ = variables[4];

            Assert.AreEqual("da_", da_.Name);
            var dx = (Tensor <float>)backLoop.Fors[0].Expression;
            var da = (Tensor <float>)backLoop.Fors[1].Expression;

            Assert.IsTrue((d_f1_ * a_ + da_).StructuralEquality(dx));
            Assert.IsTrue((d_f1_ * x_ + da_).StructuralEquality(da));
            Assert.IsTrue((d_f1_).StructuralEquality(dfor.Expression));
        }
コード例 #2
0
ファイル: BroadCast.cs プロジェクト: stuarthillary/TheaNet
 public override void Backward(Tensor <Type> delta, Backpropagation bp)
 {
     if (delta.IsZero)
     {
         return;
     }
     foreach (var a in broadcast)
     {
         delta = Op.Sum(delta, axis: a, keepDims: true);
     }
     bp.PushGradientTo(x, delta);
 }
コード例 #3
0
        public void SumProductHasCorrectGrad()
        {
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");

            // sum xs in the accumulator
            Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) =>
                                                                                  new List <Tensor <float> >()
            {
                x + a, x *a
            };
            var loop = T.Scan(rec, xs, new[] { z, null });

            // get the last value
            var prod = loop[1][-1];
            var cost = T.Sum(prod);
            var dz   = T.Grad(cost, z);

            var slicing = dz as Slicing <float>;

            Assert.AreEqual(1, slicing.Slices.Count);
            Assert.IsTrue(slicing.Slices[0].IsSingleton);
            Assert.AreEqual(-1, ((Scalar <int> .Const)slicing.Slices[0].Start).Value);

            var dfor     = slicing.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count);
            Assert.AreEqual(3, backLoop.Fors.Count);
            Assert.AreEqual(1, dfor.Index);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var x_        = variables[0];

            Assert.AreEqual("x_", x_.Name);
            var a_ = variables[1];

            Assert.AreEqual("a_", a_.Name);
            var d_f1_ = variables[2];

            Assert.AreEqual("delta_f1_", d_f1_.Name);
            var da_ = variables[4];

            Assert.AreEqual("da_", da_.Name);
            var dx = (Tensor <float>)backLoop.Fors[0].Expression;
            var da = (Tensor <float>)backLoop.Fors[1].Expression;

            Assert.IsTrue((d_f1_ * a_ + da_).StructuralEquality(dx));
            Assert.IsTrue((d_f1_ * x_ + da_).StructuralEquality(da));
        }
コード例 #4
0
        public void SumHasCorrectGrad()
        {
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");

            // sum xs in the accumulator
            var partialSums = T.Scan((x, a) => x + a, xs, z);
            // get the last value
            var sum  = partialSums[-1];
            var cost = T.Sum(sum * sum);
            var dz   = T.Grad(cost, z);

            var slicing = dz as Slicing <float>;

            Assert.AreEqual(1, slicing.Slices.Count);
            Assert.IsTrue(slicing.Slices[0].IsSingleton);
            Assert.AreEqual(-1, ((Scalar <int> .Const)slicing.Slices[0].Start).Value);

            var dfor     = slicing.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count);
            Assert.AreEqual(3, backLoop.Fors.Count);
            Assert.AreEqual(1, dfor.Index);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var x_        = variables[0];

            Assert.AreEqual("x_", x_.Name);
            var a_ = variables[1];

            Assert.AreEqual("a_", a_.Name);
            var delta_a_ = variables[2];

            Assert.AreEqual("delta_a_", delta_a_.Name);
            var dx_ = variables[3];

            Assert.AreEqual("dx_", dx_.Name);
            var da_ = variables[4];

            Assert.AreEqual("da_", da_.Name);
            var dx = (Tensor <float>)backLoop.Fors[0].Expression;
            var da = (Tensor <float>)backLoop.Fors[1].Expression;

            Assert.IsTrue((delta_a_ + da_).StructuralEquality(dx));
            Assert.IsTrue((delta_a_ + da_).StructuralEquality(da));
        }
コード例 #5
0
            public override sealed void Backward(Tensor <Type> delta, Backpropagation bp)
            {
                delta.AssertOfShape(Shape);

                for (int i = 0; i < Inputs.Length; ++i)
                {
                    var x      = Inputs[i];
                    var deltaX = delta * D(i);
                    foreach (int axis in broadcast[x])
                    {
                        deltaX = Op.Sum(deltaX, axis, keepDims: true);
                    }
                    bp.PushGradientTo(x, deltaX);
                }

                // TODO: fix. This may push gradient using variables of the lambda outside of the Apply.
                bp.PushGradientTo(Abstraction, Op.Sum(delta));
            }
コード例 #6
0
        public void SumProductWithSharedCanTrain()
        {
            var n = 2;
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");
            var b = T.Shared(NN.Ones(n), "b");

            // sum xs in the accumulator
            Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) =>
                                                                                  new List <Tensor <float> >()
            {
                x + a, x *a + b
            };
            var loop = T.Scan(rec, xs, new[] { z, null });

            // get the last value
            var prod = loop[1][-1];

            // compute the cost and the gradient for the shared b.
            var cost = T.Sum(prod);
            var db   = T.Grad(cost, b);

            var costFunction = T.Function(input: (xs, z), output: cost);
            var xs_          = NN.Array(new float[, ] {
                { 1, -1 },
                { 0, -2 }
            });

            var z_ = NN.Zeros(n);

            var cost_xs_z = costFunction(xs_, z_);

            Assert.AreEqual(4, cost_xs_z);

            var updates = new OrderedDictionary {
                { b, b - 0.05f * db }
            };
            var train      = T.Function(input: (xs, z), output: cost, updates: updates);
            var cost_xs_z2 = train(xs_, z_);

            AssertArray.AreAlmostEqual(NN.Array(new[] { 0.95f, 0.95f }), b.Value);
        }
コード例 #7
0
        public void RnnXorHasCorrectGradient()
        {
            NN.Random.Seed(12345);
            int nh = 10; // hidden layer

            var Wbit   = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "Wbit");
            var Wstate = T.Shared(NN.Eye <float>(nh), "Wstate");
            var Wout   = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, 1, nh).As <float>(), "Wout");
            var b      = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "b");

            var state0 = T.Shared(NN.Zeros <float>(nh, 1), "state0");

            var bits     = T.Tensor3 <float>("bits");          // n x 1
            var expected = T.Matrix <float>("expected");       // 1 x 1

            Func <Tensor <float>, Tensor <float>, Tensor <float> > recurrence = (bit, oldState) =>
            {
                return(T.Tanh(T.Dot(Wbit, bit) + T.Dot(Wstate, oldState) + b));
            };

            var states = T.Scan(fn: recurrence, sequence: bits, outputsInfo: state0);

            var output    = T.Tanh(T.Dot(Wout, states[(Slice)(-1)]));
            var error     = 0.5f * T.Norm2(output - expected);
            var classify  = T.Function(bits, output);
            var gradients = T.Grad(error);

            var gradWstate = gradients[Wstate];

            Assert.IsNotNull(gradWstate);
            var gradWstateIsReshape = gradWstate as Reshaping <float>;

            Assert.IsNotNull(gradWstateIsReshape);
            var gradWstateIsSum = gradWstateIsReshape.x as Sum <float>;

            Assert.IsNotNull(gradWstateIsSum);
            var dfor     = gradWstateIsSum.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count); // bit, states, delta
            Assert.AreEqual(6, backLoop.Fors.Count);      // dbit, dstate, dWstate, db, dWbit, dstate_p1
            Assert.AreEqual(3, dfor.Index);

            // TODO: check why a recursive was expected
            //var dWstate_ = dfor.RecursiveVariable;
            //Assert.AreEqual("dWstate_", dWstate_.Name);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var bit_      = variables[0];

            Assert.AreEqual("bit_", bit_.Name);
            var oldState_ = variables[1];

            Assert.AreEqual("oldState_", oldState_.Name);
            var delta_oldState_ = variables[2];

            Assert.AreEqual("delta_oldState_", delta_oldState_.Name);
            var dbit_ = variables[3];

            Assert.AreEqual("dbit_", dbit_.Name);
            var doldState_ = variables[4];

            Assert.AreEqual("doldState_", doldState_.Name);
            var oldState_tp1_ = variables[5];

            Assert.AreEqual("oldState_tp1", oldState_tp1_.Name);

            var d = T.Sum((delta_oldState_ + doldState_) * (1f - T.Square(oldState_tp1_)), axis: 1, keepDims: true);

            var doldState = (Tensor <float>)backLoop.Fors[1].Expression;

            (T.Dot(Wstate, d, transposeX: true)).AssertEqual(doldState);

            var dWstate    = (Tensor <float>)backLoop.Fors[3].Expression;
            var dWstateExp = T.Dot(d, oldState_, transposeY: true);

            dWstateExp.AssertEqual(dWstate);

            var dbit = (Tensor <float>)backLoop.Fors[0].Expression;

            (T.Dot(Wbit, d, transposeX: true)).StructuralEquality(dbit);

            var oldState_tp1 = (Tensor <float>)backLoop.Fors[5].Expression;

            oldState_tp1.AssertEqual(oldState_);
        }
コード例 #8
0
ファイル: Tensor.For.cs プロジェクト: stuarthillary/TheaNet
            public override void Backward(Tensor <Type> deltas, Backpropagation bp)
            {
                deltas.AssertOfShape(Shape);

                var deltaFromRecursive = OutputInfo != null;

                // var in the forward -> for in the backward
                var forsDic = new Dictionary <ISymbol, IFor>();   // ITensorSymbol

                var backLoop = new Loop("d" + Loop.Name);

                backLoop.Length = Loop.Length;
                var substitution = new Patch(preserveShape: true);

                // add the sequences used by the forward
                int fwdSeqCount = Loop.Sequences.Count;

                for (int i = 0; i < fwdSeqCount; i++)
                {
                    var seq      = Loop.Sequences[i];
                    var variable = Loop.Variable(seq);
                    var alias    = Loop.Sequences[i].Match(
                        (Tensor <float> s) =>
                        backLoop.AddSeq(s[Step_m1], variable.Name + "_", Loop.SequenceAxes[i]),
                        (Tensor <int> s) =>
                        backLoop.AddSeq(s[Step_m1], variable.Name + "_", Loop.SequenceAxes[i]),
                        (Func <ITensor>)null
                        );
                    substitution.Add_(variable, alias);
                }

                // add the sequences computed by the forward
                foreach (var @for in Loop.Fors)
                {
                    if (@for.IsRecursive)
                    {
                        var variable = @for.RecursiveVariable;
                        var alias    = @for.Match(
                            (Tensor <float> .For f) =>
                            backLoop.AddSeq(new Insert <float>(f, 0, f.OutputInfo, 0)[From_m2_Step_m1], variable.Name + "_", axis: 0),
                            (Tensor <int> .For f) =>
                            backLoop.AddSeq(new Insert <int>(f, 0, f.OutputInfo, 0)[From_m2_Step_m1], variable.Name + "_", axis: 0),
                            (Func <ITensor>)null
                            );
                        substitution.Add_(variable, alias);
                    }
                    else
                    {
                        var alias = @for.Match(
                            (Tensor <float> .For f) =>
                            backLoop.AddSeq(f[Step_m1], @for.Name + "_"),
                            (Tensor <int> .For f) =>
                            backLoop.AddSeq(f[Step_m1], @for.Name + "_"),
                            (Func <ITensor>)null
                            );
                        substitution.Add_(@for.Expression, alias);
                    }
                }

                // add the retropropagated delta
                var deltaOut = backLoop.AddSeq(deltas[Step_m1], $"delta_{RecursiveVariable?.ToString() ?? "f" + Index}_", axis: 0);

                // d_ avoid duplicated variables with the same name.
                var d_ = new Dictionary <IVar, IVar>();

                // add deltas of sequences (inputs of and computed by the forward), initialized to zero
                var recVariables = Loop.RecursiveFors.Select(f => Loop.Variable(f));

                foreach (var varFwd in Loop.Variables)
                {
                    var zeros = varFwd.Match(
                        (Tensor <float> .Var x) => Op.ZerosLike(x),
                        (Tensor <int> .Var x) => Op.ZerosLike(x),
                        (Func <ITensor>)null
                        );
                    var @for = backLoop.AddRecursive_(zeros, zeros, $"d{varFwd.Name}_");
                    @for.Comment = $"dL/d{varFwd}";

                    d_[varFwd]      = @for.RecursiveVariable;
                    forsDic[varFwd] = @for;
                }

                // `others` collect gradients pushed to expressions of the loop that aren't sequences or variables.
                var others = new Dictionary <IExpr, IFor>();

                AddDeltaFromBackpropagate(backLoop, others, forsDic, Backpropagation.Backward(Expression, deltaFromRecursive ? deltaOut + (Var)d_[RecursiveVariable] : deltaOut));

                foreach (var @for in Loop.RecursiveFors)
                {
                    var variable = @for.RecursiveVariable;

                    if (!deltaFromRecursive || @for != this)
                    {
                        var gradExpr = @for.Match(
                            (Tensor <float> .For f) => Backpropagation.Backward(f.Expression, (Tensor <float>)d_[f.RecursiveVariable]),
                            (Tensor <int> .For f) => Backpropagation.Backward(f.Expression, (Tensor <int>)d_[f.RecursiveVariable]),
                            null
                            );

                        AddDeltaFromBackpropagate(backLoop, others, forsDic, gradExpr);
                    }
                    // else: we already added the delta prior to the loop

                    // reuse results computed during the forward inside the backward
                    var alias_tp1 = backLoop.AddRecursive_(variable, @for[-1], variable.Name + "_tp1").RecursiveVariable;
                    substitution.Add_(@for.Expression, alias_tp1);
                }

                // Substitute variable in fors
                foreach (var @for in backLoop.Fors)
                {
                    var comment = @for.Expression.Comment;
                    @for.Expression         = (ITensor)@for.Expression.Patch(substitution);
                    @for.Expression.Comment = comment;
                }

                // deltas of sequences
                for (int i = 0; i < Loop.Sequences.Count; ++i)
                {
                    if (Loop.Sequences[i] is Tensor <float> )
                    {
                        bp.PushGradientTo((Tensor <float>)Loop.Sequences[i], ((Tensor <float>)backLoop.Fors[i])[Step_m1]);
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }
                }

                // deltas of seed
                foreach (var @for in Loop.RecursiveFors)
                {
                    if (@for is Tensor <float> )
                    {
                        bp.PushGradientTo((Tensor <float>)@for.OutputInfo, ((Tensor <float>)forsDic[@for.RecursiveVariable])[-1]);
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }
                }

                // other deltas
                foreach (var W_dW in others)
                {
                    var W = W_dW.Key; var dW = W_dW.Value;
                    if (W is Tensor <float> )
                    {
                        bp.PushGradientTo((Tensor <float>)W, Op.Sum((Tensor <float>)dW, axis: 0));
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }
                }
            }