public IWeightMatrix RepeatRows(IWeightMatrix w, int n) { var m = w as WeightTensor; if (m.Rows == 1) { var res = weightTensorFactory.CreateWeightTensor(m.Rows * n, m.Columns, m.TWeight.Expand(n, m.Columns), m.TGradient.Expand(n, m.Columns)); if (this.needs_backprop) { Action backward = () => { res.Dispose(); }; this.backprop.Add(backward); } return(res); } else { List <IWeightMatrix> ws = new List <IWeightMatrix>(); for (int i = 0; i < n; i++) { ws.Add(w); } return(ConcatRows(ws)); } }
public IWeightMatrix AddTanh(IWeightMatrix w1, IWeightMatrix w2) { var m1 = w1 as WeightTensor; var m2 = w2 as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m1.Rows, m1.Columns, deviceId); Ops.AddTanh(res.TWeight, m1.TWeight, m2.TWeight); if (this.needs_backprop) { Action backward = () => { //Ops.AddTanhD(m1.TGradient, m1.TGradient, res.TWeight, res.TGradient); //Ops.AddTanhD(m2.TGradient, m2.TGradient, res.TWeight, res.TGradient); m1.AddTanhGradient(res); m2.AddTanhGradient(res); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix Sigmoid(IWeightMatrix w, bool updateWeightsInPlace = false) { var m = w as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m.Rows, m.Columns, deviceId); if (updateWeightsInPlace) { res.TWeight = m.TWeight.CopyRef(); } Ops.Sigmoid(res.TWeight, m.TWeight); if (this.needs_backprop) { Action backward = () => { // Ops.AddSigmoidD(m.TGradient, m.TGradient, res.TWeight, res.TGradient); m.AddSigmoidGradient(res); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix PeekRow(IWeightMatrix w, int ix, int num = 1) { WeightTensor m = w as WeightTensor; var tw = m.TWeight.Narrow(0, ix, num); var tg = m.TGradient != null?m.TGradient.Narrow(0, ix, num) : null; var res = weightTensorFactory.CreateWeightTensor(num, m.Columns, tw, tg); lock (locker) { for (int i = 0; i < num; i++) { if (m.RowToBeUpdated.ContainsKey(ix + i) == false) { m.RowToBeUpdated.Add(ix + i, 1); } else { m.RowToBeUpdated[ix + i]++; } } } if (this.needs_backprop) { Action backward = () => { res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix EltMulMulAdd(IWeightMatrix w1, IWeightMatrix w2, IWeightMatrix w3, IWeightMatrix w4) { var m1 = w1 as WeightTensor; var m2 = w2 as WeightTensor; var m3 = w3 as WeightTensor; var m4 = w4 as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m1.Rows, m1.Columns, deviceId); Ops.MulMulAdd(res.TWeight, m1.TWeight, m2.TWeight, m3.TWeight, m4.TWeight); if (this.needs_backprop) { Action backward = () => { Ops.AddMul(m1.TGradient, m1.TGradient, m2.TWeight, res.TGradient); Ops.AddMul(m2.TGradient, m2.TGradient, m1.TWeight, res.TGradient); Ops.AddMul(m3.TGradient, m3.TGradient, m4.TWeight, res.TGradient); Ops.AddMul(m4.TGradient, m4.TGradient, m3.TWeight, res.TGradient); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix PermuteBatch(IWeightMatrix m, int batchSize) { WeightTensor t = m as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m.Rows, m.Columns, deviceId); int sizeEveryBatch = m.Rows / batchSize; res.TWeight = Ops.AsContiguous(t.TWeight.View(sizeEveryBatch, batchSize, m.Columns).Permute(1, 0, 2)).View(m.Rows, m.Columns); if (this.needs_backprop) { Action backward = () => { var g = t.TGradient.View(sizeEveryBatch, batchSize, m.Columns); var t2 = res.TGradient.View(batchSize, sizeEveryBatch, m.Columns).Permute(1, 0, 2); Ops.Add(g, g, t2); g.Dispose(); t2.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix Perform(IWeightMatrix state, AttentionPreProcessResult attenPreProcessResult, IComputeGraph g) { var bWas = g.RepeatRows(bWa, state.Rows); var wc = g.MulAdd(state, Wa, bWas); var wcs = g.RepeatRows(wc, attenPreProcessResult.inputsUnfolder[0].Rows); var ggs = g.AddTanh(attenPreProcessResult.uhs, wcs); var atten = g.Mul(ggs, V); List <IWeightMatrix> attens = g.UnFolderRow(atten, m_batchSize); List <IWeightMatrix> contexts = new List <IWeightMatrix>(); List <IWeightMatrix> attensT = new List <IWeightMatrix>(); for (int i = 0; i < m_batchSize; i++) { attensT.Add(g.Transpose2(attens[i])); } var attenT = g.ConcatRows(attensT); var attenSoftmax = g.SoftmaxM(attenT); for (int i = 0; i < m_batchSize; i++) { IWeightMatrix context = g.Mul(g.PeekRow(attenSoftmax, i), attenPreProcessResult.inputsUnfolder[i]); contexts.Add(context); } return(g.ConcatRows(contexts)); }
public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph) { var hidden_prev = ht; var cell_prev = ct; var inputs = innerGraph.ConcatColumns(input, hidden_prev); var bs = innerGraph.RepeatRows(b, input.Rows); var hhSum = innerGraph.MulAdd(inputs, Wxh, bs); var hhSum2 = layerNorm1.Process(hhSum, innerGraph); (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum2, hdim * 3, hdim); var gates = innerGraph.Sigmoid(gates_raw); var cell_write = innerGraph.Tanh(cell_write_raw); (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim); // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write ct = innerGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write); var ct2 = layerNorm2.Process(ct, innerGraph); // compute hidden state as gated, saturated cell activations ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct2)); return(ht); }
public IWeightMatrix Dropout(IWeightMatrix V, float drop_prob) { float p = 1.0f - drop_prob; var w = V as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(V.Rows, V.Columns, deviceId); Tensor noise = BuildRandomTensor(V.Rows, V.Columns, p); Ops.Mul(res.TWeight, w.TWeight, noise); if (this.needs_backprop) { Action backward = () => { res.ReleaseWeight(); // Ops.AddMul(w.TGradient, w.TGradient, res.TGradient, noise); w.AddMulGradient(noise, res.TGradient); noise.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix PeekRow(IWeightMatrix w, int ix, int num = 1) { WeightTensor m = w as WeightTensor; var tw = m.TWeight.Narrow(0, ix, num); var tg = m.TGradient != null?m.TGradient.Narrow(0, ix, num) : null; var res = weightTensorFactory.CreateWeightTensor(num, m.Columns, tw, tg); lock (locker) { for (int i = 0; i < num; i++) { if (m.RowToBeUpdated.ContainsKey(ix + i) == false) { m.RowToBeUpdated.Add(ix + i, 1); } else { m.RowToBeUpdated[ix + i]++; } } } return(res); }
/// <summary> /// Update LSTM-Attention cells according to given weights /// </summary> /// <param name="context">The context weights for attention</param> /// <param name="input">The input weights</param> /// <param name="computeGraph">The compute graph to build workflow</param> /// <returns>Update hidden weights</returns> public IWeightMatrix Step(IWeightMatrix context, IWeightMatrix input, IComputeGraph computeGraph) { var cell_prev = ct; var hidden_prev = ht; var hxhc = computeGraph.ConcatColumns(input, hidden_prev, context); var bs = computeGraph.RepeatRows(b, input.Rows); var hhSum = computeGraph.MulAdd(hxhc, Wxhc, bs); (var gates_raw, var cell_write_raw) = computeGraph.SplitColumns(hhSum, hdim * 3, hdim); var gates = computeGraph.Sigmoid(gates_raw); var cell_write = computeGraph.Tanh(cell_write_raw); (var input_gate, var forget_gate, var output_gate) = computeGraph.SplitColumns(gates, hdim, hdim, hdim); // compute new cell activation //var retain_cell = computeGraph.EltMul(forget_gate, cell_prev); //var write_cell = computeGraph.EltMul(input_gate, cell_write); //ct = computeGraph.Add(retain_cell, write_cell); ct = computeGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write); ht = computeGraph.EltMul(output_gate, computeGraph.Tanh(ct)); return(ht); }
public List <IWeightMatrix> SplitColumns2(IWeightMatrix w, params int[] sizes) { var m = w as WeightTensor; List <IWeightMatrix> resList = new List <IWeightMatrix>(); int x = 0; foreach (int size in sizes) { WeightTensor res = weightTensorFactory.CreateWeightTensor(m.Rows, size, m.TWeight.Narrow(1, x, size), m.TGradient.Narrow(1, x, size)); resList.Add(res); x += size; } if (this.needs_backprop) { Action backward = () => { foreach (var item in resList) { item.Dispose(); } }; this.backprop.Add(backward); } return(resList); }
public virtual IWeightMatrix Transpose2(IWeightMatrix w) { var m = w as WeightMatrix; var res = weightMatrixFactory.CreateWeightMatrix(m.Columns, m.Rows); for (var i = 0; i < m.Rows; i++) { for (var j = 0; j < m.Columns; j++) { res.Weight[j * res.Columns + i] = m.Weight[i * m.Columns + j]; } } if (this.needs_backprop) { Action backward = () => { for (var i = 0; i < m.Rows; i++) { for (var j = 0; j < m.Columns; j++) { m.Gradient[i * m.Columns + j] += res.Gradient[j * res.Columns + i]; } } }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix Process(IWeightMatrix input, IComputeGraph innerGraph) { var alphas = innerGraph.RepeatRows(alpha, input.Rows); var betas = innerGraph.RepeatRows(beta, input.Rows); return(innerGraph.LayerNorm(input, alphas, betas)); }
public IWeightMatrix ConcatColumns(IWeightMatrix w1, IWeightMatrix w2) { var m1 = w1 as WeightTensor; var m2 = w2 as WeightTensor; int sx = m1.Rows; int sy = m1.Columns + m2.Columns; var res = weightTensorFactory.CreateWeightTensor(sx, sy, deviceId); Ops.Concat(res.TWeight, 1, m1.TWeight, m2.TWeight); if (this.needs_backprop) { Action backward = () => { Tensor tTmp1 = res.TGradient.Narrow(1, 0, m1.Columns); Ops.Add(m1.TGradient, m1.TGradient, tTmp1); Tensor tTmp2 = res.TGradient.Narrow(1, m1.Columns, m2.Columns); Ops.Add(m2.TGradient, m2.TGradient, tTmp2); tTmp1.Dispose(); tTmp2.Dispose(); }; this.backprop.Add(backward); } return(res); }
public override IWeightMatrix Add(IWeightMatrix w1, IWeightMatrix w2) { var m1 = w1 as WeightMatrix; var m2 = w2 as WeightMatrix; var res = weightMatrixFactory.CreateWeightMatrix(m1.Rows, m1.Columns); unsafe { fixed(float *m1W = m1.Weight, m2W = m2.Weight, resW = res.Weight) { vsAdd(res.Weight.Length, m1W, m2W, resW); } } if (this.needs_backprop) { Action backward = () => { unsafe { fixed(float *resG = res.Gradient, m1G = m1.Gradient, m2G = m2.Gradient) { vsAdd(res.Gradient.Length, resG, m1G, m1G); vsAdd(res.Gradient.Length, resG, m2G, m2G); } } }; this.backprop.Add(backward); } return(res); }
public void AddGradient(IWeightMatrix src) { WeightTensor m = src as WeightTensor; lock (locker) { Tensor t = new Tensor(TGradient.Allocator, DType.Float32, Rows, Columns); Ops.Copy(t, m.TGradient); Ops.Add(TGradient, TGradient, t); foreach (var kv in m.RowToBeUpdated) { if (RowToBeUpdated.ContainsKey(kv.Key) == false) { RowToBeUpdated.Add(kv.Key, kv.Value); } else { RowToBeUpdated[kv.Key] += kv.Value; } } t.Dispose(); } }
public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph) { var hidden_prev = ht; var cell_prev = ct; var inputs = innerGraph.ConcatColumns(input, hidden_prev); var bs = innerGraph.RepeatRows(b, input.Rows); var hhSum = innerGraph.MulAdd(inputs, Wxh, bs); (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum, hdim * 3, hdim); var gates = innerGraph.Sigmoid(gates_raw); var cell_write = innerGraph.Tanh(cell_write_raw); (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim); // compute new cell activation var retain_cell = innerGraph.EltMul(forget_gate, cell_prev); // what do we keep from cell var write_cell = innerGraph.EltMul(input_gate, cell_write); // what do we write to cell ct = innerGraph.Add(retain_cell, write_cell); // new cell contents // compute hidden state as gated, saturated cell activations ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct)); return(ht); }
public IWeightMatrix Softmax(IWeightMatrix w) { WeightTensor m = w as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m.Rows, m.Columns, deviceId); var maxval = Ops.MaxAll(m.TWeight); Ops.ExpSub(res.TWeight, m.TWeight, maxval); float s = Ops.SumAll(res.TWeight); Ops.Mul(res.TWeight, res.TWeight, 1.0f / s); if (this.needs_backprop) { Action backward = () => { Tensor tTmp = Ops.Mul(null, res.TGradient, res.TWeight); Ops.Add(m.TGradient, m.TGradient, tTmp); float ss = Ops.SumAll(tTmp); Ops.AddMulV(m.TGradient, m.TGradient, res.TWeight, -ss); tTmp.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix MulAdd2(IWeightMatrix m1, IWeightMatrix m2, IWeightMatrix m3) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; WeightTensor t3 = m3 as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = weightTensorFactory.CreateWeightTensor(n, d, deviceId); Ops.Addmm(res.TWeight, 1.0f, t3.TWeight, 1.0f, t1.TWeight, t2.TWeight); if (this.needs_backprop) { Action backward = () => { Ops.Add(t3.TGradient, t3.TGradient, res.TGradient); var tW2 = t2.TWeight.Transpose(); Ops.Addmm(t1.TGradient, 1.0f, t1.TGradient, 1.0f, res.TGradient, tW2); var tW1 = t1.TWeight.Transpose(); Ops.Addmm(t2.TGradient, 1.0f, t2.TGradient, 1.0f, tW1, res.TGradient); tW1.Dispose(); tW2.Dispose(); }; this.backprop.Add(backward); } return(res); }
public List <IWeightMatrix> UnFolderRow(IWeightMatrix m, int n, bool gradient = true) { List <IWeightMatrix> resList = new List <IWeightMatrix>(); WeightTensor t = m as WeightTensor; if (gradient) { Tensor tW = t.TWeight.Unfold(0, n, n); Tensor tG = t.TGradient.Unfold(0, n, n); for (int i = 0; i < n; i++) { WeightTensor res = weightTensorFactory.CreateWeightTensor(m.Rows / n, m.Columns, tW.Select(2, i), tG.Select(2, i)); if (res.Rows != res.TWeight.Sizes[0] || res.Rows != res.TGradient.Sizes[0]) { throw new InvalidOperationException("Invalide unfolder"); } resList.Add(res); } tW.Dispose(); tG.Dispose(); } else { Tensor tw = t.TWeight.Unfold(0, n, n); for (int i = 0; i < n; i++) { WeightTensor res = weightTensorFactory.CreateWeightTensor(m.Rows / n, m.Columns, tw.Select(2, i), null); if (res.Rows != res.TWeight.Sizes[0]) { throw new InvalidOperationException("Invalide unfolder"); } resList.Add(res); } tw.Dispose(); } if (this.needs_backprop && gradient) { Action backward = () => { foreach (var item in resList) { item.Dispose(); } }; this.backprop.Add(backward); } return(resList); }
public void SetGradientByWeight(IWeightMatrix src) { WeightMatrix m = src as WeightMatrix; // Gradient = m.Weight; Array.Copy(m.Weight, Gradient, m.Weight.Length); }
public IWeightMatrix MulBatch(IWeightMatrix m1, IWeightMatrix m2, int batchSize) { if (batchSize != 1) { throw new ArgumentException($"For CPU operations, its batch size must be 1."); } return(Mul(m1, m2)); }
public IWeightMatrix SoftmaxM(IWeightMatrix w, bool bp = true) { if (w.Rows != 1) { throw new InvalidOperationException($"The row size of given matrix must be 1."); } return(Softmax(w)); }
public IWeightMatrix Mul(IWeightMatrix w, float v) { var m = w as WeightMatrix; var res = weightMatrixFactory.CreateWeightMatrix(m.Rows, m.Columns); var n = m.Weight.Length; var i = 0; var moreItems = (n % Vector <float> .Count); while (i < n - moreItems) { var vecResW = new Vector <float>(res.Weight, i); var vecM1W = new Vector <float>(m.Weight, i); vecResW = vecM1W * v; vecResW.CopyTo(res.Weight, i); i += Vector <float> .Count; } while (i < n) { res.Weight[i] = m.Weight[i] * v; i++; } if (this.needs_backprop) { Action backward = () => { i = 0; while (i < n - moreItems) { var vecResGrad = new Vector <float>(res.Gradient, i); var vecM1W = new Vector <float>(m.Weight, i); var vecM1Grad = new Vector <float>(m.Gradient, i); vecM1Grad += v * vecResGrad; vecM1Grad.CopyTo(m.Gradient, i); i += Vector <float> .Count; } while (i < n) { m.Gradient[i] += v * res.Gradient[i]; i++; } }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix Encode(IWeightMatrix V, IComputeGraph g) { foreach (var encoder in encoders) { var e = encoder.Step(V, g); V = e; } return(V); }
public virtual IWeightMatrix RepeatRows(IWeightMatrix w, int n) { List <IWeightMatrix> wl = new List <IWeightMatrix>(); for (int i = 0; i < n; i++) { wl.Add(w); } return(ConcatRows(wl)); }
public void SetGradientByWeight(IWeightMatrix src) { WeightTensor m = src as WeightTensor; // Ops.Copy(TGradient, m.TWeight); TGradient.Dispose(); TGradient = m.TWeight; m.TWeight = null; }
public virtual IWeightMatrix Tanh(IWeightMatrix w) { var m = w as WeightMatrix; // tanh nonlinearity var res = weightMatrixFactory.CreateWeightMatrix(m.Rows, m.Columns); var n = m.Weight.Length; var moreItems = (n % Vector <float> .Count); var i = 0; while (i < n - moreItems) { var vecMW = new Vector <float>(m.Weight, i); var vecSig = FastTanh(vecMW); vecSig.CopyTo(res.Weight, i); i += Vector <float> .Count; } while (i < n) { res.Weight[i] = FastTanh(m.Weight[i]); i++; } if (this.needs_backprop) { Action backward = () => { i = 0; while (i < n - moreItems) { var vecResW = new Vector <float>(res.Weight, i); var vecResGrad = new Vector <float>(res.Gradient, i); var vecMGrad = new Vector <float>(m.Gradient, i); vecMGrad = (vecMGrad + (Vector <float> .One - vecResW * vecResW) * vecResGrad); vecMGrad.CopyTo(m.Gradient, i); i += Vector <float> .Count; } while (i < n) { var mwi = res.Weight[i]; m.Gradient[i] = (float)(m.Gradient[i] + (1.0 - mwi * mwi) * res.Gradient[i]); i++; } }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix Transpose2(IWeightMatrix w) { WeightTensor m = w as WeightTensor; var wT = m.TWeight.Transpose(); var gT = m.TGradient.Transpose(); var res = weightTensorFactory.CreateWeightTensor(m.Columns, m.Rows, wT, gT); return(res); }