public IWeightMatrix MulAdd2(IWeightMatrix m1, IWeightMatrix m2, IWeightMatrix m3) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; WeightTensor t3 = m3 as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = weightTensorFactory.CreateWeightTensor(n, d, deviceId); Ops.Addmm(res.TWeight, 1.0f, t3.TWeight, 1.0f, t1.TWeight, t2.TWeight); if (this.needs_backprop) { Action backward = () => { Ops.Add(t3.TGradient, t3.TGradient, res.TGradient); var tW2 = t2.TWeight.Transpose(); Ops.Addmm(t1.TGradient, 1.0f, t1.TGradient, 1.0f, res.TGradient, tW2); var tW1 = t1.TWeight.Transpose(); Ops.Addmm(t2.TGradient, 1.0f, t2.TGradient, 1.0f, tW1, res.TGradient); tW1.Dispose(); tW2.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightTensor Mul(IWeightTensor m1, IWeightTensor m2) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res; res = m_weightTensorFactory.CreateWeightTensor(n, d, m_deviceId, name: $"{GetHashString(m1.Name, m2.Name)}.Mul"); VisualizeNodes(new IWeightTensor[] { m1, m2 }, res); Ops.Addmm(res.TWeight, 0.0f, res.TWeight, 1.0f, t1.TWeight, t2.TWeight); if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); using (var tW2 = t2.TWeight.Transpose()) { Ops.Addmm(t1.TGradient, 1.0f, t1.TGradient, 1.0f, res.TGradient, tW2); } using (var tW1 = t1.TWeight.Transpose()) { Ops.Addmm(t2.TGradient, 1.0f, t2.TGradient, 1.0f, tW1, res.TGradient); } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public void AddGradient(IWeightMatrix src) { WeightTensor m = src as WeightTensor; lock (locker) { Tensor t = new Tensor(TGradient.Allocator, DType.Float32, Rows, Columns); Ops.Copy(t, m.TGradient); Ops.Add(TGradient, TGradient, t); foreach (var kv in m.RowToBeUpdated) { if (RowToBeUpdated.ContainsKey(kv.Key) == false) { RowToBeUpdated.Add(kv.Key, kv.Value); } else { RowToBeUpdated[kv.Key] += kv.Value; } } t.Dispose(); } }
public IWeightTensor Affine(IWeightTensor m1, IWeightTensor m2, IWeightTensor mbias) { if (m1 == null) { throw new ArgumentNullException($"m1 tensor is null"); } if (m2 == null) { throw new ArgumentNullException($"m2 tensor is null"); } if (mbias == null) { throw new ArgumentNullException($"mbias tensor is null"); } WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; WeightTensor t3 = mbias as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = m_weightTensorFactory.CreateWeightTensor(n, d, m_deviceId, name: $"{GetHashString(m1.Name, m2.Name, mbias.Name)}.Affine"); VisualizeNodes(new IWeightTensor[] { m1, m2, mbias }, res); using (var t3WExp = t3.TWeight.Expand(n, d)) { Ops.Addmm(res.TWeight, 1.0f, t3WExp, 1.0f, t1.TWeight, t2.TWeight); } if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); using (var t3G = t3.TGradient.Expand(n, d)) { Ops.Add(t3G, t3G, res.TGradient); } using (var tW2 = t2.TWeight.Transpose()) { Ops.Addmm(t1.TGradient, 1.0f, t1.TGradient, 1.0f, res.TGradient, tW2); } using (var tW1 = t1.TWeight.Transpose()) { Ops.Addmm(t2.TGradient, 1.0f, t2.TGradient, 1.0f, tW1, res.TGradient); } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public IWeightTensor Permute(IWeightTensor w, params int[] dims) { var m = w as WeightTensor; WeightTensor res = m_weightTensorFactory.CreateWeightTensor(m.Sizes, m_deviceId, name: $"{GetHashString(w.Name)}.Permute"); VisualizeNodes(w, res); using (var tWPremute = m.TWeight.Permute(dims)) { res.TWeight = Ops.AsContiguous(tWPremute); } if (m_needsBackprop) { Action backward = () => { using (var gT = m.TGradient.Permute(dims)) { Ops.Add(gT, gT, res.TGradient); } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public IWeightMatrix PeekRow(IWeightMatrix w, int ix, int num = 1) { WeightTensor m = w as WeightTensor; var tw = m.TWeight.Narrow(0, ix, num); var tg = m.TGradient != null?m.TGradient.Narrow(0, ix, num) : null; var res = weightTensorFactory.CreateWeightTensor(num, m.Columns, tw, tg); lock (locker) { for (int i = 0; i < num; i++) { if (m.RowToBeUpdated.ContainsKey(ix + i) == false) { m.RowToBeUpdated.Add(ix + i, 1); } else { m.RowToBeUpdated[ix + i]++; } } } return(res); }
public IWeightMatrix Softmax(IWeightMatrix w) { WeightTensor m = w as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m.Rows, m.Columns, deviceId); var maxval = Ops.MaxAll(m.TWeight); Ops.ExpSub(res.TWeight, m.TWeight, maxval); float s = Ops.SumAll(res.TWeight); Ops.Mul(res.TWeight, res.TWeight, 1.0f / s); if (this.needs_backprop) { Action backward = () => { Tensor tTmp = Ops.Mul(null, res.TGradient, res.TWeight); Ops.Add(m.TGradient, m.TGradient, tTmp); float ss = Ops.SumAll(tTmp); Ops.AddMulV(m.TGradient, m.TGradient, res.TWeight, -ss); tTmp.Dispose(); }; this.backprop.Add(backward); } return(res); }
public WeightTensor BuildPositionWeightTensor(int row, int column, int deviceId, string name = "", bool isTrainable = false) { WeightTensor t = new WeightTensor(new long[2] { row, column }, deviceId, name: name, isTrainable: isTrainable); double numTimescales = (float)column / 2; double logTimescaleIncrement = Math.Log(10000.0f) / (numTimescales - 1.0f); float[] posWeights = new float[row * column]; for (int p = 0; p < row; ++p) { for (int i = 0; i < numTimescales; ++i) { float v = (float)(p * Math.Exp(i * -logTimescaleIncrement)); posWeights[p * column + i] = (float)Math.Sin(v); posWeights[p * column + (int)numTimescales + i] = (float)Math.Cos(v); } } t.TWeight.CopyFrom(posWeights); weights.Add(t); return(t); }
public IWeightMatrix PermuteBatch(IWeightMatrix m, int batchSize) { WeightTensor t = m as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m.Rows, m.Columns, deviceId); int sizeEveryBatch = m.Rows / batchSize; res.TWeight = Ops.AsContiguous(t.TWeight.View(sizeEveryBatch, batchSize, m.Columns).Permute(1, 0, 2)).View(m.Rows, m.Columns); if (this.needs_backprop) { Action backward = () => { var g = t.TGradient.View(sizeEveryBatch, batchSize, m.Columns); var t2 = res.TGradient.View(batchSize, sizeEveryBatch, m.Columns).Permute(1, 0, 2); Ops.Add(g, g, t2); g.Dispose(); t2.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix PeekRow(IWeightMatrix w, int ix, int num = 1) { WeightTensor m = w as WeightTensor; var tw = m.TWeight.Narrow(0, ix, num); var tg = m.TGradient != null?m.TGradient.Narrow(0, ix, num) : null; var res = weightTensorFactory.CreateWeightTensor(num, m.Columns, tw, tg); lock (locker) { for (int i = 0; i < num; i++) { if (m.RowToBeUpdated.ContainsKey(ix + i) == false) { m.RowToBeUpdated.Add(ix + i, 1); } else { m.RowToBeUpdated[ix + i]++; } } } if (this.needs_backprop) { Action backward = () => { res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public List <IWeightMatrix> SplitColumns2(IWeightMatrix w, params int[] sizes) { var m = w as WeightTensor; List <IWeightMatrix> resList = new List <IWeightMatrix>(); int x = 0; foreach (int size in sizes) { WeightTensor res = weightTensorFactory.CreateWeightTensor(m.Rows, size, m.TWeight.Narrow(1, x, size), m.TGradient.Narrow(1, x, size)); resList.Add(res); x += size; } if (this.needs_backprop) { Action backward = () => { foreach (var item in resList) { item.Dispose(); } }; this.backprop.Add(backward); } return(resList); }
public IWeightTensor Softmax(IWeightTensor w, bool runGradients = true, bool inPlace = false) { WeightTensor m = w as WeightTensor; WeightTensor res = null; if (inPlace) { res = m.CopyWeightsRef($"{GetHashString(w.Name)}.Softmax"); } else { res = m_weightTensorFactory.CreateWeightTensor(m.Sizes, m_deviceId, name: $"{GetHashString(w.Name)}.Softmax"); } VisualizeNodes(w, res); Ops.Softmax(res.TWeight, m.TWeight); if (m_needsBackprop && runGradients) { Action backward = () => { if (inPlace) { m.TGradient = res.TGradient.CopyRef(); } m.AddSoftmaxGradient(res, inPlace); res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public WeightTensor CreateWeightTensor(int row, int column, int deviceId, bool cleanWeights = false) { var k = buffer.GetOrAdd(row, x => new ConcurrentDictionary <int, WeightTensorList>()); var mList = k.GetOrAdd(column, x => new WeightTensorList()); WeightTensor r; // lock (locker) // { // if (mList.index == mList.WeightTensors.Count) // { r = new WeightTensor(row, column, deviceId); mList.WeightTensors.Add(r); //} //else //{ // r = mList.WeightTensors[mList.index]; // r.ClearGradient(); //} //mList.index++; // } if (cleanWeights) { r.ClearWeight(); } return(r); }
public IWeightMatrix ConcatRowColumn(List <IWeightMatrix> wl1, List <IWeightMatrix> wl2) { int sx = wl1[0].Rows * wl1.Count; int sy = wl1[0].Columns + wl2[0].Columns; var res = weightTensorFactory.CreateWeightTensor(sx, sy, deviceId); var resTWC1 = res.TWeight.Narrow(1, 0, wl1[0].Columns); var resTWC2 = res.TWeight.Narrow(1, wl1[0].Columns, wl2[0].Columns); for (int i = 0; i < wl1.Count; i++) { WeightTensor m1 = wl1[i] as WeightTensor; WeightTensor m2 = wl2[i] as WeightTensor; var resTWC1R = resTWC1.Narrow(0, i * m1.Rows, m1.Rows); Ops.Copy(resTWC1R, m1.TWeight); var resTWC2R = resTWC2.Narrow(0, i * m2.Rows, m2.Rows); Ops.Copy(resTWC2R, m2.TWeight); resTWC1R.Dispose(); resTWC2R.Dispose(); } resTWC1.Dispose(); resTWC2.Dispose(); if (this.needs_backprop) { Action backward = () => { var res1 = res.TGradient.Narrow(1, 0, wl1[0].Columns); var res2 = res.TGradient.Narrow(1, wl1[0].Columns, wl2[0].Columns); for (int i = 0; i < wl1.Count; i++) { WeightTensor m1 = wl1[i] as WeightTensor; WeightTensor m2 = wl2[i] as WeightTensor; var resTGC1R = res1.Narrow(0, i * m1.Rows, m1.Rows); var resTGC2R = res2.Narrow(0, i * m1.Rows, m1.Rows); Ops.Add(m1.TGradient, m1.TGradient, resTGC1R); Ops.Add(m2.TGradient, m2.TGradient, resTGC2R); resTGC1R.Dispose(); resTGC2R.Dispose(); } res1.Dispose(); res2.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public List <IWeightMatrix> UnFolderRow(IWeightMatrix m, int n, bool gradient = true) { List <IWeightMatrix> resList = new List <IWeightMatrix>(); WeightTensor t = m as WeightTensor; if (gradient) { Tensor tW = t.TWeight.Unfold(0, n, n); Tensor tG = t.TGradient.Unfold(0, n, n); for (int i = 0; i < n; i++) { WeightTensor res = weightTensorFactory.CreateWeightTensor(m.Rows / n, m.Columns, tW.Select(2, i), tG.Select(2, i)); if (res.Rows != res.TWeight.Sizes[0] || res.Rows != res.TGradient.Sizes[0]) { throw new InvalidOperationException("Invalide unfolder"); } resList.Add(res); } tW.Dispose(); tG.Dispose(); } else { Tensor tw = t.TWeight.Unfold(0, n, n); for (int i = 0; i < n; i++) { WeightTensor res = weightTensorFactory.CreateWeightTensor(m.Rows / n, m.Columns, tw.Select(2, i), null); if (res.Rows != res.TWeight.Sizes[0]) { throw new InvalidOperationException("Invalide unfolder"); } resList.Add(res); } tw.Dispose(); } if (this.needs_backprop && gradient) { Action backward = () => { foreach (var item in resList) { item.Dispose(); } }; this.backprop.Add(backward); } return(resList); }
public WeightTensor CopyWeightsRef(string name) { WeightTensor result = new WeightTensor(Sizes, DeviceId, name); result.m_TWeight = m_TWeight.CopyRef(); return(result); }
public WeightTensor CreateWeightTensor(int row, int column, Tensor w, Tensor g) { WeightTensor t = new WeightTensor(row, column, w, g); weights.Add(t); return(t); }
public WeightTensor CreateWeightTensor(int row, int column, int deviceId, Tensor w, bool gradient = true) { WeightTensor t = new WeightTensor(row, column, w, deviceId, gradient); weights.Add(t); return(t); }
// private object locker = new object(); public WeightTensor CreateWeightTensor(int row, int column, Tensor w, Tensor g) { WeightTensor t = new WeightTensor(row, column, w, g); // lock (locker) // { weights.Add(t); // } return(t); }
public void SetGradientByWeight(IWeightMatrix src) { WeightTensor m = src as WeightTensor; // Ops.Copy(TGradient, m.TWeight); TGradient.Dispose(); TGradient = m.TWeight; m.TWeight = null; }
public void CopyWeightsToGradients(IWeightTensor src) { WeightTensor m = src as WeightTensor; if (m_TGradient != null) { m_TGradient.Dispose(); } m_TGradient = m.TWeight.CopyRef(); }
public IWeightMatrix Transpose2(IWeightMatrix w) { WeightTensor m = w as WeightTensor; var wT = m.TWeight.Transpose(); var gT = m.TGradient.Transpose(); var res = weightTensorFactory.CreateWeightTensor(m.Columns, m.Rows, wT, gT); return(res); }
public IWeightMatrix MulBatch(IWeightMatrix m1, IWeightMatrix m2, int batchSize) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = weightTensorFactory.CreateWeightTensor(n, d, deviceId); Tensor t1W = t1.TWeight.View(batchSize, t1.Rows / batchSize, t1.Columns); Tensor t2W = t2.TWeight.View(batchSize, t2.Rows / batchSize, t2.Columns); Tensor rW = res.TWeight.View(batchSize, n / batchSize, d); Ops.AddmmBatch(rW, 0.0f, rW, 1.0f, t1W, t2W); rW.Dispose(); if (this.needs_backprop) { Action backward = () => { res.ReleaseWeight(); Tensor t1G = t1.TGradient.View(batchSize, t1.Rows / batchSize, t1.Columns); Tensor t2G = t2.TGradient.View(batchSize, t2.Rows / batchSize, t2.Columns); Tensor rG = res.TGradient.View(batchSize, n / batchSize, d); var tW2 = t2W.Transpose(1, 2); Ops.AddmmBatch(t1G, 1.0f, t1G, 1.0f, rG, tW2); var tW1 = t1W.Transpose(1, 2); Ops.AddmmBatch(t2G, 1.0f, t2G, 1.0f, tW1, rG); tW1.Dispose(); tW2.Dispose(); t1W.Dispose(); t2W.Dispose(); t1G.Dispose(); t2G.Dispose(); rG.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } else { t1W.Dispose(); t2W.Dispose(); } return(res); }
public IWeightMatrix ConcatColumns(params IWeightMatrix[] wl) { if (wl.Length == 1) { return(wl[0]); } List <Tensor> twl = new List <Tensor>(); int sx = 0; int sy = 0; foreach (IWeightMatrix item in wl) { WeightTensor m = item as WeightTensor; sx = m.Rows; sy += m.Columns; twl.Add(m.TWeight); } var res = weightTensorFactory.CreateWeightTensor(sx, sy, deviceId); Ops.Concat(res.TWeight, 1, twl.ToArray()); if (this.needs_backprop) { Action backward = () => { res.ReleaseWeight(); sy = 0; foreach (IWeightMatrix item in wl) { WeightTensor m = item as WeightTensor; Tensor tTmp = res.TGradient.Narrow(1, sy, m.Columns); // Ops.Add(m.TGradient, m.TGradient, tTmp); m.CopyOrAddGradient(tTmp); sy += m.Columns; tTmp.Dispose(); } res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public void SetGradientByWeight(IWeightTensor src) { WeightTensor m = src as WeightTensor; if (m_TGradient != null) { m_TGradient.Dispose(); } m_TGradient = m.TWeight; m.m_TWeight = null; }
public IWeightTensor ConcatColumns(params IWeightTensor[] wl) { if (wl.Length == 1) { return(wl[0]); } List <string> srcNameList = new List <string>(); List <Tensor> twl = new List <Tensor>(); int sx = 0; int sy = 0; foreach (IWeightTensor item in wl) { WeightTensor m = item as WeightTensor; sx = m.Rows; sy += m.Columns; twl.Add(m.TWeight); srcNameList.Add(item.Name); } string srcNames = String.Join("_", srcNameList); var res = m_weightTensorFactory.CreateWeightTensor(sx, sy, m_deviceId, name: $"{GetHashString(srcNames)}.ConcatColumns"); VisualizeNodes(wl, res); Ops.Concat(res.TWeight, 1, twl.ToArray()); if (this.m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); sy = 0; foreach (IWeightTensor item in wl) { WeightTensor m = item as WeightTensor; using (Tensor tTmp = res.TGradient.Narrow(1, sy, m.Columns)) { m.CopyOrAddGradient(tTmp); sy += m.Columns; } } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public IWeightTensor ConcatRows(List <IWeightTensor> wl) { if (wl.Count == 1) { return(wl[0]); } List <string> wlNameList = new List <string>(); List <Tensor> twl = new List <Tensor>(); int sx = 0; int sy = 0; foreach (IWeightTensor item in wl) { WeightTensor m = item as WeightTensor; sx += m.Rows; sy = m.Columns; twl.Add(m.TWeight); wlNameList.Add(item.Name); } var wlName = String.Join("_", wlNameList); var res = m_weightTensorFactory.CreateWeightTensor(sx, sy, m_deviceId, name: $"{GetHashString(wlName)}.ConcatRows"); VisualizeNodes(wl, res); Ops.Concat(res.TWeight, 0, twl.ToArray()); if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); sx = 0; foreach (IWeightTensor item in wl) { WeightTensor m = item as WeightTensor; using (var tTmp = res.TGradient.Narrow(0, sx, m.Rows)) { m.CopyOrAddGradient(tTmp); sx += m.Rows; } } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public WeightTensor CreateWeightTensor(int row, int column, int deviceId, bool cleanWeights = false) { WeightTensor r = new WeightTensor(row, column, deviceId); if (cleanWeights) { r.ClearWeight(); } weights.Add(r); return(r); }
public WeightTensor CreateWeightTensor(long[] sizes, int deviceId, bool cleanWeights = false, string name = "") { WeightTensor r = new WeightTensor(sizes, deviceId, name); if (cleanWeights) { r.ClearWeight(); } weights.Add(r); return(r); }
public void AddSigmoidGradient(WeightTensor src) { if (m_TGradient == null) { allocator = TensorAllocator.Allocator(DeviceId); m_TGradient = new Tensor(allocator, DType.Float32, src.TWeight.Sizes); Ops.SigmoidD(m_TGradient, src.TWeight, src.TGradient); } else { Ops.AddSigmoidD(m_TGradient, m_TGradient, src.TWeight, src.TGradient); } }