public void SumProductWithSharedHasCorrectGrad() { // sequence of input var xs = T.Matrix <float>("xs"); // accumulator var z = T.Vector <float>("z"); var b = T.Vector <float>("b"); // sum xs in the accumulator Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) => new List <Tensor <float> >() { x + a, x *a + b }; var loop = T.Scan(rec, xs, new[] { z, null }); // get the last value var prod = loop[1][-1]; var cost = T.Sum(prod); //var dz = T.Grad(cost, z); var db = T.Grad(cost, b); var reshape = db as Reshaping <float>; var sum = reshape.x as Sum <float>; Assert.AreEqual(0, sum.Axis); var dfor = sum.x as Tensor <float> .For; var backLoop = dfor.Loop; Assert.AreEqual(3, backLoop.Sequences.Count); Assert.AreEqual(4, backLoop.Fors.Count); Assert.AreEqual(2, dfor.Index); // TODO: check why a recursive was expected //var db_ = dfor.RecursiveVariable; //Assert.AreEqual("db_", db_.Name); var variables = backLoop.Variables.Cast <Tensor <float> >().ToList(); var x_ = variables[0]; Assert.AreEqual("x_", x_.Name); var a_ = variables[1]; Assert.AreEqual("a_", a_.Name); var d_f1_ = variables[2]; Assert.AreEqual("delta_f1_", d_f1_.Name); var da_ = variables[4]; Assert.AreEqual("da_", da_.Name); var dx = (Tensor <float>)backLoop.Fors[0].Expression; var da = (Tensor <float>)backLoop.Fors[1].Expression; Assert.IsTrue((d_f1_ * a_ + da_).StructuralEquality(dx)); Assert.IsTrue((d_f1_ * x_ + da_).StructuralEquality(da)); Assert.IsTrue((d_f1_).StructuralEquality(dfor.Expression)); }
public override void Backward(Tensor <Type> delta, Backpropagation bp) { if (delta.IsZero) { return; } foreach (var a in broadcast) { delta = Op.Sum(delta, axis: a, keepDims: true); } bp.PushGradientTo(x, delta); }
public void SumProductHasCorrectGrad() { // sequence of input var xs = T.Matrix <float>("xs"); // accumulator var z = T.Vector <float>("z"); // sum xs in the accumulator Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) => new List <Tensor <float> >() { x + a, x *a }; var loop = T.Scan(rec, xs, new[] { z, null }); // get the last value var prod = loop[1][-1]; var cost = T.Sum(prod); var dz = T.Grad(cost, z); var slicing = dz as Slicing <float>; Assert.AreEqual(1, slicing.Slices.Count); Assert.IsTrue(slicing.Slices[0].IsSingleton); Assert.AreEqual(-1, ((Scalar <int> .Const)slicing.Slices[0].Start).Value); var dfor = slicing.x as Tensor <float> .For; var backLoop = dfor.Loop; Assert.AreEqual(3, backLoop.Sequences.Count); Assert.AreEqual(3, backLoop.Fors.Count); Assert.AreEqual(1, dfor.Index); var variables = backLoop.Variables.Cast <Tensor <float> >().ToList(); var x_ = variables[0]; Assert.AreEqual("x_", x_.Name); var a_ = variables[1]; Assert.AreEqual("a_", a_.Name); var d_f1_ = variables[2]; Assert.AreEqual("delta_f1_", d_f1_.Name); var da_ = variables[4]; Assert.AreEqual("da_", da_.Name); var dx = (Tensor <float>)backLoop.Fors[0].Expression; var da = (Tensor <float>)backLoop.Fors[1].Expression; Assert.IsTrue((d_f1_ * a_ + da_).StructuralEquality(dx)); Assert.IsTrue((d_f1_ * x_ + da_).StructuralEquality(da)); }
public void SumHasCorrectGrad() { // sequence of input var xs = T.Matrix <float>("xs"); // accumulator var z = T.Vector <float>("z"); // sum xs in the accumulator var partialSums = T.Scan((x, a) => x + a, xs, z); // get the last value var sum = partialSums[-1]; var cost = T.Sum(sum * sum); var dz = T.Grad(cost, z); var slicing = dz as Slicing <float>; Assert.AreEqual(1, slicing.Slices.Count); Assert.IsTrue(slicing.Slices[0].IsSingleton); Assert.AreEqual(-1, ((Scalar <int> .Const)slicing.Slices[0].Start).Value); var dfor = slicing.x as Tensor <float> .For; var backLoop = dfor.Loop; Assert.AreEqual(3, backLoop.Sequences.Count); Assert.AreEqual(3, backLoop.Fors.Count); Assert.AreEqual(1, dfor.Index); var variables = backLoop.Variables.Cast <Tensor <float> >().ToList(); var x_ = variables[0]; Assert.AreEqual("x_", x_.Name); var a_ = variables[1]; Assert.AreEqual("a_", a_.Name); var delta_a_ = variables[2]; Assert.AreEqual("delta_a_", delta_a_.Name); var dx_ = variables[3]; Assert.AreEqual("dx_", dx_.Name); var da_ = variables[4]; Assert.AreEqual("da_", da_.Name); var dx = (Tensor <float>)backLoop.Fors[0].Expression; var da = (Tensor <float>)backLoop.Fors[1].Expression; Assert.IsTrue((delta_a_ + da_).StructuralEquality(dx)); Assert.IsTrue((delta_a_ + da_).StructuralEquality(da)); }
public override sealed void Backward(Tensor <Type> delta, Backpropagation bp) { delta.AssertOfShape(Shape); for (int i = 0; i < Inputs.Length; ++i) { var x = Inputs[i]; var deltaX = delta * D(i); foreach (int axis in broadcast[x]) { deltaX = Op.Sum(deltaX, axis, keepDims: true); } bp.PushGradientTo(x, deltaX); } // TODO: fix. This may push gradient using variables of the lambda outside of the Apply. bp.PushGradientTo(Abstraction, Op.Sum(delta)); }
public void SumProductWithSharedCanTrain() { var n = 2; // sequence of input var xs = T.Matrix <float>("xs"); // accumulator var z = T.Vector <float>("z"); var b = T.Shared(NN.Ones(n), "b"); // sum xs in the accumulator Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) => new List <Tensor <float> >() { x + a, x *a + b }; var loop = T.Scan(rec, xs, new[] { z, null }); // get the last value var prod = loop[1][-1]; // compute the cost and the gradient for the shared b. var cost = T.Sum(prod); var db = T.Grad(cost, b); var costFunction = T.Function(input: (xs, z), output: cost); var xs_ = NN.Array(new float[, ] { { 1, -1 }, { 0, -2 } }); var z_ = NN.Zeros(n); var cost_xs_z = costFunction(xs_, z_); Assert.AreEqual(4, cost_xs_z); var updates = new OrderedDictionary { { b, b - 0.05f * db } }; var train = T.Function(input: (xs, z), output: cost, updates: updates); var cost_xs_z2 = train(xs_, z_); AssertArray.AreAlmostEqual(NN.Array(new[] { 0.95f, 0.95f }), b.Value); }
public void RnnXorHasCorrectGradient() { NN.Random.Seed(12345); int nh = 10; // hidden layer var Wbit = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "Wbit"); var Wstate = T.Shared(NN.Eye <float>(nh), "Wstate"); var Wout = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, 1, nh).As <float>(), "Wout"); var b = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "b"); var state0 = T.Shared(NN.Zeros <float>(nh, 1), "state0"); var bits = T.Tensor3 <float>("bits"); // n x 1 var expected = T.Matrix <float>("expected"); // 1 x 1 Func <Tensor <float>, Tensor <float>, Tensor <float> > recurrence = (bit, oldState) => { return(T.Tanh(T.Dot(Wbit, bit) + T.Dot(Wstate, oldState) + b)); }; var states = T.Scan(fn: recurrence, sequence: bits, outputsInfo: state0); var output = T.Tanh(T.Dot(Wout, states[(Slice)(-1)])); var error = 0.5f * T.Norm2(output - expected); var classify = T.Function(bits, output); var gradients = T.Grad(error); var gradWstate = gradients[Wstate]; Assert.IsNotNull(gradWstate); var gradWstateIsReshape = gradWstate as Reshaping <float>; Assert.IsNotNull(gradWstateIsReshape); var gradWstateIsSum = gradWstateIsReshape.x as Sum <float>; Assert.IsNotNull(gradWstateIsSum); var dfor = gradWstateIsSum.x as Tensor <float> .For; var backLoop = dfor.Loop; Assert.AreEqual(3, backLoop.Sequences.Count); // bit, states, delta Assert.AreEqual(6, backLoop.Fors.Count); // dbit, dstate, dWstate, db, dWbit, dstate_p1 Assert.AreEqual(3, dfor.Index); // TODO: check why a recursive was expected //var dWstate_ = dfor.RecursiveVariable; //Assert.AreEqual("dWstate_", dWstate_.Name); var variables = backLoop.Variables.Cast <Tensor <float> >().ToList(); var bit_ = variables[0]; Assert.AreEqual("bit_", bit_.Name); var oldState_ = variables[1]; Assert.AreEqual("oldState_", oldState_.Name); var delta_oldState_ = variables[2]; Assert.AreEqual("delta_oldState_", delta_oldState_.Name); var dbit_ = variables[3]; Assert.AreEqual("dbit_", dbit_.Name); var doldState_ = variables[4]; Assert.AreEqual("doldState_", doldState_.Name); var oldState_tp1_ = variables[5]; Assert.AreEqual("oldState_tp1", oldState_tp1_.Name); var d = T.Sum((delta_oldState_ + doldState_) * (1f - T.Square(oldState_tp1_)), axis: 1, keepDims: true); var doldState = (Tensor <float>)backLoop.Fors[1].Expression; (T.Dot(Wstate, d, transposeX: true)).AssertEqual(doldState); var dWstate = (Tensor <float>)backLoop.Fors[3].Expression; var dWstateExp = T.Dot(d, oldState_, transposeY: true); dWstateExp.AssertEqual(dWstate); var dbit = (Tensor <float>)backLoop.Fors[0].Expression; (T.Dot(Wbit, d, transposeX: true)).StructuralEquality(dbit); var oldState_tp1 = (Tensor <float>)backLoop.Fors[5].Expression; oldState_tp1.AssertEqual(oldState_); }
public override void Backward(Tensor <Type> deltas, Backpropagation bp) { deltas.AssertOfShape(Shape); var deltaFromRecursive = OutputInfo != null; // var in the forward -> for in the backward var forsDic = new Dictionary <ISymbol, IFor>(); // ITensorSymbol var backLoop = new Loop("d" + Loop.Name); backLoop.Length = Loop.Length; var substitution = new Patch(preserveShape: true); // add the sequences used by the forward int fwdSeqCount = Loop.Sequences.Count; for (int i = 0; i < fwdSeqCount; i++) { var seq = Loop.Sequences[i]; var variable = Loop.Variable(seq); var alias = Loop.Sequences[i].Match( (Tensor <float> s) => backLoop.AddSeq(s[Step_m1], variable.Name + "_", Loop.SequenceAxes[i]), (Tensor <int> s) => backLoop.AddSeq(s[Step_m1], variable.Name + "_", Loop.SequenceAxes[i]), (Func <ITensor>)null ); substitution.Add_(variable, alias); } // add the sequences computed by the forward foreach (var @for in Loop.Fors) { if (@for.IsRecursive) { var variable = @for.RecursiveVariable; var alias = @for.Match( (Tensor <float> .For f) => backLoop.AddSeq(new Insert <float>(f, 0, f.OutputInfo, 0)[From_m2_Step_m1], variable.Name + "_", axis: 0), (Tensor <int> .For f) => backLoop.AddSeq(new Insert <int>(f, 0, f.OutputInfo, 0)[From_m2_Step_m1], variable.Name + "_", axis: 0), (Func <ITensor>)null ); substitution.Add_(variable, alias); } else { var alias = @for.Match( (Tensor <float> .For f) => backLoop.AddSeq(f[Step_m1], @for.Name + "_"), (Tensor <int> .For f) => backLoop.AddSeq(f[Step_m1], @for.Name + "_"), (Func <ITensor>)null ); substitution.Add_(@for.Expression, alias); } } // add the retropropagated delta var deltaOut = backLoop.AddSeq(deltas[Step_m1], $"delta_{RecursiveVariable?.ToString() ?? "f" + Index}_", axis: 0); // d_ avoid duplicated variables with the same name. var d_ = new Dictionary <IVar, IVar>(); // add deltas of sequences (inputs of and computed by the forward), initialized to zero var recVariables = Loop.RecursiveFors.Select(f => Loop.Variable(f)); foreach (var varFwd in Loop.Variables) { var zeros = varFwd.Match( (Tensor <float> .Var x) => Op.ZerosLike(x), (Tensor <int> .Var x) => Op.ZerosLike(x), (Func <ITensor>)null ); var @for = backLoop.AddRecursive_(zeros, zeros, $"d{varFwd.Name}_"); @for.Comment = $"dL/d{varFwd}"; d_[varFwd] = @for.RecursiveVariable; forsDic[varFwd] = @for; } // `others` collect gradients pushed to expressions of the loop that aren't sequences or variables. var others = new Dictionary <IExpr, IFor>(); AddDeltaFromBackpropagate(backLoop, others, forsDic, Backpropagation.Backward(Expression, deltaFromRecursive ? deltaOut + (Var)d_[RecursiveVariable] : deltaOut)); foreach (var @for in Loop.RecursiveFors) { var variable = @for.RecursiveVariable; if (!deltaFromRecursive || @for != this) { var gradExpr = @for.Match( (Tensor <float> .For f) => Backpropagation.Backward(f.Expression, (Tensor <float>)d_[f.RecursiveVariable]), (Tensor <int> .For f) => Backpropagation.Backward(f.Expression, (Tensor <int>)d_[f.RecursiveVariable]), null ); AddDeltaFromBackpropagate(backLoop, others, forsDic, gradExpr); } // else: we already added the delta prior to the loop // reuse results computed during the forward inside the backward var alias_tp1 = backLoop.AddRecursive_(variable, @for[-1], variable.Name + "_tp1").RecursiveVariable; substitution.Add_(@for.Expression, alias_tp1); } // Substitute variable in fors foreach (var @for in backLoop.Fors) { var comment = @for.Expression.Comment; @for.Expression = (ITensor)@for.Expression.Patch(substitution); @for.Expression.Comment = comment; } // deltas of sequences for (int i = 0; i < Loop.Sequences.Count; ++i) { if (Loop.Sequences[i] is Tensor <float> ) { bp.PushGradientTo((Tensor <float>)Loop.Sequences[i], ((Tensor <float>)backLoop.Fors[i])[Step_m1]); } else { throw new NotImplementedException(); } } // deltas of seed foreach (var @for in Loop.RecursiveFors) { if (@for is Tensor <float> ) { bp.PushGradientTo((Tensor <float>)@for.OutputInfo, ((Tensor <float>)forsDic[@for.RecursiveVariable])[-1]); } else { throw new NotImplementedException(); } } // other deltas foreach (var W_dW in others) { var W = W_dW.Key; var dW = W_dW.Value; if (W is Tensor <float> ) { bp.PushGradientTo((Tensor <float>)W, Op.Sum((Tensor <float>)dW, axis: 0)); } else { throw new NotImplementedException(); } } }