public void TestLoop1() { // Computing tanh(x(t).dot(W) + b) elementwise //http://deeplearning.net/software/theano/tutorial/loop.html // defining the tensor variables var X = T.Matrix <float>("x"); var W = T.Matrix <float>("W"); var b_sym = T.Vector <float>("b_sym"); var results = T.Scan(v => T.Tanh(T.Dot(v, W) + b_sym), sequence: X); var compute_elementwise = T.Function(inputs: new[] { X, W, b_sym }, output: results); // test values var x = NN.Eye <float>(2); var w = NN.Ones <float>(2, 2); var b = NN.Ones <float>(2); b.Item[1] = 2; var result = compute_elementwise(new[] { x, w, b }); var expected = NN.Tanh(x.Dot(w) + b); AssertArray.AreAlmostEqual(expected[0], result[0]); }
/// <summary> /// /// </summary> /// <param name="nh">dimension of the hidden layer</param> /// <param name="nc">number of classes</param> /// <param name="de">dimension of the word embeddings</param> /// <param name="cs">word window context size</param> public Elman3(int nh, int nc, int de, int cs) { // parameters of the model var scale = 0.2f; this.Wx = T.Shared(scale * NN.Random.Uniform(-1.0f, 1.0f, de * cs, nh), "Wx"); //this.Wh = T.Shared(scale * NN.Random.Uniform(-1.0f, 1.0f, nh, nh), "Wh"); this.Wh = T.Shared(NN.Eye <float>(nh), "Wh"); this.W = T.Shared(scale * NN.Random.Uniform(-1.0f, 1.0f, nh, nc), "W"); this.bh = T.Shared(NN.Zeros <float>(nh), "bh"); this.b = T.Shared(NN.Zeros <float>(nc), "b"); this.h0 = T.Shared(NN.Zeros <float>(nh), "h0"); // bundle this.@params = new[] { this.Wx, this.Wh, this.W, this.bh, this.b, this.h0 }; var x = T.Matrix <float>("x"); // [sentence, de * cs] var y = T.Scalar <int>("y"); // label Func <Tensor <float>, Tensor <float>, Tensor <float>[]> recurrence = (x_t, h_tm1) => { var h_t = T.Sigmoid(T.Dot(x_t, this.Wx) + T.Dot(h_tm1, this.Wh) + this.bh); var s_t = T.Softmax(T.Dot(h_t, this.W) + this.b); return(new[] { h_t, s_t }); }; var result = T.Scan( fn: recurrence, sequences: x, outputsInfo: new[] { this.h0, null } /*, n_steps: x.Shape[0]*/); var h = result[0]; var s = result[1]; var p_y_given_x_lastword = s[-1, /*0,*/ XSlicer._]; // 0 because of Theano's Softmax ? var p_y_given_x_sentence = s[XSlicer._, /*0,*/ XSlicer._]; var y_pred = T.Argmax(p_y_given_x_sentence, axis: 1); // cost and gradients and learning rate var lr = T.Scalar <float>("lr"); nll = -T.Mean(T.Log(p_y_given_x_lastword)[y]); var gradients = T.Grad(nll); var updates = new OrderedDictionary(); foreach (var W in @params) { updates[W] = W - lr * gradients[W]; } // theano functions this.classify = T.Function(input: x, output: y_pred); this.train = T.Function(input: (x, y, lr), output: nll, updates: updates); }
public void TestScan2() { var x = Op.Matrix <float>("x"); var f = Op.Function(input: x, output: Op.Scan(v => 2f * v, sequence: x)); var input = NN.Eye <float>(2); var result = f(input); AssertArray.AreEqual(2 * input, result); }
public void TestScan() { var x = Matrix <float>("x"); var f = Function(input: x, output: Scan(v => v, sequence: x)); var input = NN.Eye <float>(2); var result = f(input); AssertArray.AreEqual(input, result); }
public void TestScan3() { var x = Op.Matrix <float>("x"); var y = Op.Matrix <float>("y"); var f = Op.Function(input: (x, y), output: Op.Scan((v1, v2) => v1 + v2, sequences: new[] { x, y })); var input1 = NN.Eye <float>(2); var input2 = 2 * NN.Eye <float>(2); var result = f(input1, input2); AssertArray.AreEqual(input1 + input2, result); }
public void TestScan4() { var X = Op.Matrix <float>("X"); var acc0 = Op.Shared(NN.Zeros <float>(5), "acc0"); var loop = Op.Scan(fn: (x, acc) => acc + x, sequence: X, outputsInfo: acc0); var f = Op.Function(input: X, output: loop[-1]); var input1 = NN.Eye <float>(5); var result = f(input1); AssertArray.AreEqual(new float[] { 1, 1, 1, 1, 1 }, result); }
public void TestDotWithIdentity() { var a = NN.Ones <float>(4, 5); a[_, Upto(-1)] = NN.Eye <float>(4); var b = NN.Random.Uniform(-1, 1, 4).As <float>(); var c = NN.Ones <float>(5); c[Upto(4)] = b; var ac = a.Dot(c); var ab = a.DotWithBias(b); AssertArray.AreEqual(ac, ab); }
public static Array <float> PowerMethod(Array <float> a) { // https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse // init A(0) = (A*A + dI)-1.A* var d = 1e-6f; var result = a.T.Dot(a) + d * NN.Eye(a.Shape[1]); Lapack.Inverse(result.Values, result.Shape[0]); result = result.Dot(a.T); // iterate: A(i+1) = 2A(i) - A(i).A.A(i) for (int i = 0; i < 2; i++) { result = 2 * result - result.Dot(a).Dot(result); } return(result); }
public void TestDotWithBias() { var a = NN.Zeros <float>(3, 4); var id = NN.Eye <float>(3); a[_, Upto(-1)] = id; var expected = NN.Array <float>(new float[, ] { { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 } }); AssertArray.AreAlmostEqual(expected, a); //var x = Tensor.Ones(3); //var y = Tensor.Ones(3).Scale(2); //Assert.AreEqual(x, y); }
public void TestPseudoInverse() { var path = @"C:\Users\joc\AppData\Local\ProtoStudio\Banque\embeddings.bin"; var words = Word2Vec.LoadBinary(path, normalize: true).Vectors /*[_, Until(100)]*/; //var pseudoInv = PseudoInv(words); var pseudoInv = PowerMethod(words); // when embeddings have linearly independent dimensions AssertArray.AreAlmostEqual(NN.Eye(words.Shape[1]), pseudoInv.Dot(words), 1e-6f, 1e-6f); // least probable: words are NOT linearly idependent //AssertArray.AreAlmostEqual(NN.Eye(words.Shape[0]), words.Dot(pseudoInv), 1e-3f, 1e-5f); if (words.Shape[0] <= 1000) // otherwise too long { AssertArray.AreAlmostEqual(words, words.Dot(pseudoInv).Dot(words), 1e-6f, 1e-6f); } AssertArray.AreAlmostEqual(pseudoInv, pseudoInv.Dot(words).Dot(pseudoInv), 1e-6f, 1e-6f); }
public Tsne(Array <float> X_, int dims, float perplexity) { X_.AssertOfDim(2); int n = X_.Shape[0]; X = T.Shared(X_, "X"); Y = T.Shared(NN.Random.Uniform(-1f, 1f, n, dims), "Y"); YMomentum = T.Shared(NN.Zeros(n, dims), "YMomentum"); dYLast = T.Shared(NN.Zeros(n, dims), "dYLast"); // ones everywhere, zero on the diag mask = T.Shared(NN.Ones(n, n) - NN.Eye(n), "mask"); // Compute pairwise affinities var sum_Y = T.Sum(Y * Y, 1, keepDims: true); var num = 1 / (1 - T.DimShuffle((2 * T.Dot(Y, Y, transposeY: true) + sum_Y), 1, 0) + sum_Y); // set the diag to zero num *= mask; var Q = num / T.Sum(num); //Q = T.Max(Q, 1e-12f); var P_ = x2p(X_, 1e-5f, perplexity); P_ = P_ * 4f; // early exaggeration P_ = NN.Apply(P_, x => Math.Max(x, 1e-12f)); P = T.Shared(P_, "P"); KL_Loss = T.Sum(P * T.Log(P / Q)); dY = T.Function(output: T.Grad(KL_Loss, Y)); Loss = T.Function(output: KL_Loss); var updates = MomentumUpdate(Y, YMomentum, dYLast, T.Grad(KL_Loss, Y), 500); Train = T.Function(updates); }
public static void TestLook1() { // defining the tensor variables var X = T.Matrix <float>("x"); var W = T.Matrix <float>("W"); var b_sym = T.Matrix <float>("b_sym"); var results = T.Scan(v => T.Tanh(T.Dot(v, W) + b_sym), sequence: X); var compute_elementwise = T.Function(inputs: new[] { X, W, b_sym }, output: results); // test values var x = NN.Eye <float>(2); var w = NN.Ones <float>(2, 2); var b = NN.Ones <float>(2); b.Item[1] = 2; Console.WriteLine(compute_elementwise(new[] { x, w, b }).Item[0]); // comparison with tensors Console.WriteLine(NN.Tanh(x.Dot(w) + b)); }
/// <summary></summary> /// <param name="inputDim">dimension of the input vectors</param> /// <param name="hiddenDim">dimension of the hidden layer</param> /// <param name="nClasses">dimension of the output vector</param> /// <param name="scale">scaling factor to initialize weights</param> public GRU2(int inputDim, int hiddenDim, int nClasses, float scale = 0.2f) { // /!\ softmax requires Dot(v, M) products // initial hidden state h0 = T.Shared(NN.Zeros <float>(hiddenDim), "h0"); // reset gate layers Wr = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wr"); Ur = T.Shared(NN.Eye <float>(hiddenDim), "Ur"); br = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "br"); // update gate layers Wz = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wz"); Uz = T.Shared(NN.Eye <float>(hiddenDim), "Uz"); bz = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "bz"); // layers W = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "W"); U = T.Shared(NN.Eye <float>(hiddenDim), "U"); b = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "b"); // prediction layer S = T.Shared(NN.Random.Uniform(-scale, scale, hiddenDim, nClasses), "S"); Sb = T.Shared(NN.Zeros <float>(/*1,*/ nClasses), "Sb"); // bundle this.@params = new[] { h0, Wr, Ur, br, Wz, Uz, bz, W, U, b, S, Sb }; // Adagrad shared variables var hists = new Dictionary <string, Tensor <float> .Shared>(); foreach (var param in @params) { var name = param.Name + "Hist"; hists[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name); } // Adadelta shared variables var hists2 = new Dictionary <string, Tensor <float> .Shared>(); foreach (var param in @params) { var name = param.Name + "Hist2"; hists2[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name); } var x = T.Matrix <float>("x"); // [sentence, inputDim] var y = T.Scalar <int>("y"); Func <Tensor <float>, Tensor <float>, Tensor <float>[]> recurrence = (x_t, h_tm1) => { // reset gate var r_t = T.Sigmoid(T.Dot(x_t, Wr) + T.Dot(h_tm1, Ur) + br); // update gate var z_t = T.Sigmoid(T.Dot(x_t, Wz) + T.Dot(h_tm1, Uz) + bz); // proposed hidden state var _h_t = T.Tanh(T.Dot(x_t, W) + T.Dot(r_t * h_tm1, U) + b); // actual hidden state var h_t = z_t * h_tm1 + (1 - z_t) * _h_t; // return all the intermediate variables because they may be reused by T.Grad to optimize gradient computation return(new[] { h_t, r_t, z_t, _h_t }); }; var h = T.Scan(recurrence, x, new[] { h0, null, null, null })[0][-1]; var pred = T.Softmax(T.Dot(h, S) + Sb); var y_pred = T.Argmax(pred, axis: 0); // cost and gradients and learning rate var lr = T.Scalar <float>("lr"); var nll = -T.Mean(T.Log(pred)[y]); var gradients = T.Grad(nll); var updates = new OrderedDictionary(); foreach (var param in @params) { var grad = gradients[param]; //var grad = T.Clip(update.Item2, -10, 10); // Adagrad const float eps = 1e-5f; var hist = hists[param.Name + "Hist"]; updates[hist] = hist + grad * grad; updates[param] = param - lr * grad / T.Sqrt(hist + eps); // Adadelta //const float rho = 0.95f; //const float eps = 1e-5f; //var hist = hists[param.Name + "Hist"]; //var hist2 = hists2[param.Name + "Hist2"]; //var newHist = rho * hist + (1 - rho) * (grad * grad); //updates[hist] = newHist; //var newGrad = grad * T.Sqrt((hist2 + eps) / (newHist + eps)); //updates[param] = param - newGrad; //updates[hist2] = rho * hist2 + (1 - rho) * (newGrad * newGrad); // Regular //updates[param] = param - lr * grad; } // theano functions this.classify = T.Function(input: x, output: y_pred); this.train = T.Function(input: (x, y, lr), output: nll, updates: updates); }
public void RnnXorHasCorrectGradient() { NN.Random.Seed(12345); int nh = 10; // hidden layer var Wbit = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "Wbit"); var Wstate = T.Shared(NN.Eye <float>(nh), "Wstate"); var Wout = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, 1, nh).As <float>(), "Wout"); var b = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "b"); var state0 = T.Shared(NN.Zeros <float>(nh, 1), "state0"); var bits = T.Tensor3 <float>("bits"); // n x 1 var expected = T.Matrix <float>("expected"); // 1 x 1 Func <Tensor <float>, Tensor <float>, Tensor <float> > recurrence = (bit, oldState) => { return(T.Tanh(T.Dot(Wbit, bit) + T.Dot(Wstate, oldState) + b)); }; var states = T.Scan(fn: recurrence, sequence: bits, outputsInfo: state0); var output = T.Tanh(T.Dot(Wout, states[(Slice)(-1)])); var error = 0.5f * T.Norm2(output - expected); var classify = T.Function(bits, output); var gradients = T.Grad(error); var gradWstate = gradients[Wstate]; Assert.IsNotNull(gradWstate); var gradWstateIsReshape = gradWstate as Reshaping <float>; Assert.IsNotNull(gradWstateIsReshape); var gradWstateIsSum = gradWstateIsReshape.x as Sum <float>; Assert.IsNotNull(gradWstateIsSum); var dfor = gradWstateIsSum.x as Tensor <float> .For; var backLoop = dfor.Loop; Assert.AreEqual(3, backLoop.Sequences.Count); // bit, states, delta Assert.AreEqual(6, backLoop.Fors.Count); // dbit, dstate, dWstate, db, dWbit, dstate_p1 Assert.AreEqual(3, dfor.Index); // TODO: check why a recursive was expected //var dWstate_ = dfor.RecursiveVariable; //Assert.AreEqual("dWstate_", dWstate_.Name); var variables = backLoop.Variables.Cast <Tensor <float> >().ToList(); var bit_ = variables[0]; Assert.AreEqual("bit_", bit_.Name); var oldState_ = variables[1]; Assert.AreEqual("oldState_", oldState_.Name); var delta_oldState_ = variables[2]; Assert.AreEqual("delta_oldState_", delta_oldState_.Name); var dbit_ = variables[3]; Assert.AreEqual("dbit_", dbit_.Name); var doldState_ = variables[4]; Assert.AreEqual("doldState_", doldState_.Name); var oldState_tp1_ = variables[5]; Assert.AreEqual("oldState_tp1", oldState_tp1_.Name); var d = T.Sum((delta_oldState_ + doldState_) * (1f - T.Square(oldState_tp1_)), axis: 1, keepDims: true); var doldState = (Tensor <float>)backLoop.Fors[1].Expression; (T.Dot(Wstate, d, transposeX: true)).AssertEqual(doldState); var dWstate = (Tensor <float>)backLoop.Fors[3].Expression; var dWstateExp = T.Dot(d, oldState_, transposeY: true); dWstateExp.AssertEqual(dWstate); var dbit = (Tensor <float>)backLoop.Fors[0].Expression; (T.Dot(Wbit, d, transposeX: true)).StructuralEquality(dbit); var oldState_tp1 = (Tensor <float>)backLoop.Fors[5].Expression; oldState_tp1.AssertEqual(oldState_); }
/// <summary></summary> /// <param name="inputDim">dimension of the input vectors</param> /// <param name="hiddenDim">dimension of the hidden layer</param> /// <param name="outputDim">dimension of the output vector</param> /// <param name="scale">scaling factor to initialize weights</param> public GRU(int inputDim, int hiddenDim, int outputDim, float scale = 0.2f) { // initial hidden state h0 = T.Shared(NN.Zeros <float>(hiddenDim), "h0"); // reset gate layers Wr = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wr"); Ur = T.Shared(NN.Eye <float>(hiddenDim), "Ur"); br = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "br"); // update gate layers Wz = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wz"); Uz = T.Shared(NN.Eye <float>(hiddenDim), "Uz"); bz = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "bz"); // layers W = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "W"); U = T.Shared(NN.Eye <float>(hiddenDim), "U"); b = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "b"); // prediction layer S = T.Shared(NN.Random.Uniform(-scale, scale, hiddenDim, outputDim), "S"); Sb = T.Shared(NN.Zeros <float>(/*1,*/ outputDim), "Sb"); // bundle this.@params = new[] { h0, Wr, Ur, br, Wz, Uz, bz, W, U, b, S, Sb }; // Adagrad shared variables this.grads = new Dictionary <string, Tensor <float> .Shared>(); foreach (var param in @params) { var name = param.Name + "Grad"; this.grads[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name); } this.hists = new Dictionary <string, Tensor <float> .Shared>(); foreach (var param in @params) { var name = param.Name + "Hist"; this.hists[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name); } // Adadelta shared variables var hists2 = new Dictionary <string, Tensor <float> .Shared>(); foreach (var param in @params) { var name = param.Name + "Hist2"; hists2[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name); } var x = T.Matrix <float>("x"); // [sentence, inputDim] var expected = T.Vector <float>("expected"); Func <Tensor <float>, Tensor <float>, Tensor <float>[]> recurrence = (x_t, h_tm1) => { // reset gate var r_t = T.Sigmoid(T.Dot(x_t, Wr) + T.Dot(h_tm1, Ur) + br); // update gate var z_t = T.Sigmoid(T.Dot(x_t, Wz) + T.Dot(h_tm1, Uz) + bz); // proposed hidden state var _h_t = T.Tanh(T.Dot(x_t, W) + T.Dot(r_t * h_tm1, U) + b); // actual hidden state var h_t = z_t * h_tm1 + (1 - z_t) * _h_t; // return all the intermediate variables because they may be reused by T.Grad to optimize gradient computation return(new[] { h_t, r_t, z_t, _h_t }); }; var h = T.Scan(recurrence, x, new[] { h0, null, null, null })[0][-1]; // cost and gradients var output = T.Dot(h, S) + Sb; var error = 0.5f * T.Norm2(output - expected); var gradients = T.Grad(error); var updatesTrain = new OrderedDictionary(); foreach (var param in @params) { var grad = gradients[param]; //var grad = T.Clip(update.Item2, -10, 10); // Adagrad //const float eps = 1e-5f; var g = grads[param.Name + "Grad"]; updatesTrain[g] = g + grad; //updates[param] = param - lr * grad / T.Sqrt(hist + eps); // Adadelta //const float rho = 0.95f; //const float eps = 1e-5f; //var hist = hists[param.Name + "Hist"]; //var hist2 = hists2[param.Name + "Hist2"]; //var newHist = rho * hist + (1 - rho) * (grad * grad); //updates[hist] = newHist; //var newGrad = grad * T.Sqrt((hist2 + eps) / (newHist + eps)); //updates[param] = param - newGrad; //updates[hist2] = rho * hist2 + (1 - rho) * (newGrad * newGrad); // Regular //updates[param] = param - lr * grad; } var batchSize = T.Scalar <float>("batchSize"); var lr = T.Scalar <float>("lr"); const float eps = 1e-5f; var updates = new OrderedDictionary(); foreach (var param in this.@params) { var grad = this.grads[param.Name + "Grad"]; var meanGrad = grad / batchSize; var hist = this.hists[param.Name + "Hist"]; updates[hist] = hist + meanGrad * meanGrad; updates[param] = param - lr * meanGrad / T.Sqrt(hist + eps); updates[grad] = T.ZerosLike(grad); } // theano functions this.Classify = T.Function(input: x, output: output); this.Train = T.Function(input: (x, expected), output: error, updates: updatesTrain); this.Update = T.Function(input: (lr, batchSize), updates: updates); }
public void TestMethod1() { // https://github.com/Theano/Theano/issues/3162 // When using unbounded activation functions (e.g. Relu) the softmax function can saturate. This can lead to nan gradients when paired with categorical crossentropy cost. // If the softmax function is replaced with a numerically stable version of log-softmax and this is used directly in the cost function, then the gradients don't blow up. // It seems that this could be implemented as a pattern to recognize(softmax paired with categorical crossentropy). // Here's a code snippet that illustrates the problem with the regular softmax versus doing the same thing with the numerically stable log-softmax, where the former gives nans in the gradient and the latter does not blow up. It's interesting because the experiment indicates that for the regular softmax case, the crossentropy loss is coming out numerically stable but not the gradient. Binding.Compiler.Debug = true; var x = T.Matrix <real>("x"); var y = T.Matrix <real>("y"); // regular softmax and crossentropy var sm = T.Softmax(x); var cm1 = CategoricalCrossentropy(sm, y); var g1 = T.Grad(T.Mean(cm1), x); // numerically stable log-softmax with crossentropy var xdev = x - T.Max(x, axis: 1, keepDims: true); var lsm = xdev - T.Log(T.Sum(T.Exp(xdev), axis: 1, keepDims: true)); //var lsm2 = xdev - T.LogSumExp(xdev, axis: 1, keepDims: true); var sm2 = T.Exp(lsm); // just used to show equivalence with sm var cm2 = -T.Sum(y * lsm, axis: 1); var g2 = T.Grad(T.Mean(cm2), x); // create some inputs into a softmax that are large and labels var large = 1f; // 10f var a = NN.Exp(NN.Random.Uniform <float>(0, large, 5, 10)); // create some one-hot coded labels var b = NN.Zeros <float>(5, 10); b[Range(0, 5), Range(0, 5)] = NN.Eye <float>(5); // show equivalence of softmax and exponentiated numerically stable log-softmax var f1 = T.Function(input: x, output: (sm, sm2)); var sm_ = f1(a); var sm_1 = sm_.Item1; // classical softmax var sm_2 = sm_.Item2; // log(sum(exp)) softmax AssertArray.AreAlmostEqual(sm_1, sm_2); // now show that the two versions result in the same crossentropy cost // this indicates that the forward function does provide some numerical stability var f2 = T.Function(input: (x, y), output: (cm1, cm2)); var c_ = f2(a, b); var c_1 = c_.Item1; var c_2 = c_.Item2; AssertArray.AreAlmostEqual(c_1, c_2); // now, show that in the standard softmax case the gradients blow up // while in the log-softmax case they don't var f3 = T.Function(input: (x, y), output: (g1, g2)); var g_ = f3(a, b); var g_1 = g_.Item1; var g_2 = g_.Item2; Assert.IsTrue(float.IsNaN(g_1.Sum())); Assert.IsFalse(float.IsNaN(g_2.Sum())); }