public IWeightTensor Affine(IWeightTensor m1, IWeightTensor m2, IWeightTensor mbias) { if (m1 == null) { throw new ArgumentNullException($"m1 tensor is null"); } if (m2 == null) { throw new ArgumentNullException($"m2 tensor is null"); } if (mbias == null) { throw new ArgumentNullException($"mbias tensor is null"); } WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; WeightTensor t3 = mbias as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = m_weightTensorFactory.CreateWeightTensor(n, d, m_deviceId, name: $"{GetHashString(m1.Name, m2.Name, mbias.Name)}.Affine"); VisualizeNodes(new IWeightTensor[] { m1, m2, mbias }, res); using (var t3WExp = t3.TWeight.Expand(n, d)) { Ops.Addmm(res.TWeight, 1.0f, t3WExp, 1.0f, t1.TWeight, t2.TWeight); } if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); using (var t3G = t3.TGradient.Expand(n, d)) { Ops.Add(t3G, t3G, res.TGradient); } using (var tW2 = t2.TWeight.Transpose()) { Ops.Addmm(t1.TGradient, 1.0f, t1.TGradient, 1.0f, res.TGradient, tW2); } using (var tW1 = t1.TWeight.Transpose()) { Ops.Addmm(t2.TGradient, 1.0f, t2.TGradient, 1.0f, tW1, res.TGradient); } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public IWeightMatrix MulBatch(IWeightMatrix m1, IWeightMatrix m2, int batchSize) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = weightTensorFactory.CreateWeightTensor(n, d, deviceId); Tensor t1W = t1.TWeight.View(batchSize, t1.Rows / batchSize, t1.Columns); Tensor t2W = t2.TWeight.View(batchSize, t2.Rows / batchSize, t2.Columns); Tensor rW = res.TWeight.View(batchSize, n / batchSize, d); Ops.AddmmBatch(rW, 0.0f, rW, 1.0f, t1W, t2W); rW.Dispose(); if (this.needs_backprop) { Action backward = () => { res.ReleaseWeight(); Tensor t1G = t1.TGradient.View(batchSize, t1.Rows / batchSize, t1.Columns); Tensor t2G = t2.TGradient.View(batchSize, t2.Rows / batchSize, t2.Columns); Tensor rG = res.TGradient.View(batchSize, n / batchSize, d); var tW2 = t2W.Transpose(1, 2); Ops.AddmmBatch(t1G, 1.0f, t1G, 1.0f, rG, tW2); var tW1 = t1W.Transpose(1, 2); Ops.AddmmBatch(t2G, 1.0f, t2G, 1.0f, tW1, rG); tW1.Dispose(); tW2.Dispose(); t1W.Dispose(); t2W.Dispose(); t1G.Dispose(); t2G.Dispose(); rG.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } else { t1W.Dispose(); t2W.Dispose(); } return(res); }
public IWeightTensor MulBatch(IWeightTensor m1, IWeightTensor m2, int batchSize, float alpha = 1.0f) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; WeightTensor res = m_weightTensorFactory.CreateWeightTensor((int)(batchSize * t1.TWeight.Sizes[1]), (int)t2.TWeight.Sizes[2], m_deviceId, name: $"{GetHashString(m1.Name, m2.Name)}.MulBatch"); VisualizeNodes(new IWeightTensor[] { m1, m2 }, res); Tensor t1W = t1.TWeight; Tensor t2W = t2.TWeight; using (Tensor rW = res.TWeight.View(batchSize, t1.TWeight.Sizes[1], t2.TWeight.Sizes[2])) { Ops.AddmmBatch(rW, 0.0f, rW, alpha, t1W, t2W); } if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); using (Tensor rG = res.TGradient.View(batchSize, t1.TWeight.Sizes[1], t2.TWeight.Sizes[2])) { using (Tensor t1G = t1.TGradient.View(t1.TWeight.Sizes[0], t1.TWeight.Sizes[1], t1.TWeight.Sizes[2])) { using (var tW2 = t2W.Transpose(1, 2)) { Ops.AddmmBatch(t1G, 1.0f, t1G, 1.0f, rG, tW2); } } using (Tensor t2G = t2.TGradient.View(t2.TWeight.Sizes[0], t2.TWeight.Sizes[1], t2.TWeight.Sizes[2])) { using (var tW1 = t1W.Transpose(1, 2)) { Ops.AddmmBatch(t2G, 1.0f, t2G, 1.0f, tW1, rG); } } } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public IWeightTensor Dropout(IWeightTensor V, int batchSize, float drop_prob, bool inPlace = false) { if (drop_prob == 0) { return(V); } // Generate noise tensor float p = 1.0f - drop_prob; Tensor noise = BuildRandomTensor(V.Rows, V.Columns, batchSize, p); var w = V as WeightTensor; WeightTensor res = null; if (inPlace) { res = w.CopyWeightsRef($"{GetHashString(V.Name)}.Dropout"); } else { res = m_weightTensorFactory.CreateWeightTensor(w.Sizes, m_deviceId, name: $"{GetHashString(V.Name)}.Dropout"); } VisualizeNodes(V, res); Ops.Mul(res.TWeight, w.TWeight, noise); if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); if (inPlace) { w.TGradient = res.TGradient.CopyRef(); } w.AddMulGradient(noise, res.TGradient, inPlace); res.Dispose(); noise.Dispose(); }; this.m_backprop.Add(backward); } return(res); }
public IWeightMatrix MulAdd(IWeightMatrix m1, IWeightMatrix m2, IWeightMatrix m3) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; WeightTensor t3 = m3 as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = weightTensorFactory.CreateWeightTensor(n, d, deviceId); Ops.Addmm(res.TWeight, 1.0f, t3.TWeight, 1.0f, t1.TWeight, t2.TWeight); if (this.needs_backprop) { Action backward = () => { res.ReleaseWeight(); // Ops.Add(t3.TGradient, t3.TGradient, res.TGradient); t3.CopyOrAddGradient(res); var tW2 = t2.TWeight.Transpose(); Ops.Addmm(t1.TGradient, 1.0f, t1.TGradient, 1.0f, res.TGradient, tW2); var tW1 = t1.TWeight.Transpose(); Ops.Addmm(t2.TGradient, 1.0f, t2.TGradient, 1.0f, tW1, res.TGradient); tW1.Dispose(); tW2.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightTensor MulAdd(IWeightTensor m1, IWeightTensor m2, IWeightTensor m3) { WeightTensor t1 = m1 as WeightTensor; WeightTensor t2 = m2 as WeightTensor; WeightTensor t3 = m3 as WeightTensor; var n = t1.Rows; var d = t2.Columns; WeightTensor res = m_weightTensorFactory.CreateWeightTensor(n, d, m_deviceId, name: $"{GetHashString(m1.Name, m2.Name, m3.Name)}.MulAdd"); VisualizeNodes(new IWeightTensor[] { m1, m2, m3 }, res); Ops.Addmm(res.TWeight, 1.0f, t3.TWeight, 1.0f, t1.TWeight, t2.TWeight); if (this.m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); t3.CopyOrAddGradient(res); using (var tW2 = t2.TWeight.Transpose()) { Ops.Addmm(t1.TGradient, 1.0f, t1.TGradient, 1.0f, res.TGradient, tW2); } using (var tW1 = t1.TWeight.Transpose()) { Ops.Addmm(t2.TGradient, 1.0f, t2.TGradient, 1.0f, tW1, res.TGradient); } res.Dispose(); }; this.m_backprop.Add(backward); } return(res); }