public IWeightMatrix Dropout(IWeightMatrix V, float drop_prob) { float p = 1.0f - drop_prob; var w = V as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(V.Rows, V.Columns, deviceId); Tensor noise = BuildRandomTensor(V.Rows, V.Columns, p); Ops.Mul(res.TWeight, w.TWeight, noise); if (this.needs_backprop) { Action backward = () => { Ops.AddMul(w.TGradient, w.TGradient, res.TGradient, noise); noise.Dispose(); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public IWeightMatrix EltMulMulAdd(IWeightMatrix w1, IWeightMatrix w2, IWeightMatrix w3, IWeightMatrix w4) { var m1 = w1 as WeightTensor; var m2 = w2 as WeightTensor; var m3 = w3 as WeightTensor; var m4 = w4 as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m1.Rows, m1.Columns, deviceId); Ops.MulMulAdd(res.TWeight, m1.TWeight, m2.TWeight, m3.TWeight, m4.TWeight); if (this.needs_backprop) { Action backward = () => { Ops.AddMul(m1.TGradient, m1.TGradient, m2.TWeight, res.TGradient); Ops.AddMul(m2.TGradient, m2.TGradient, m1.TWeight, res.TGradient); Ops.AddMul(m3.TGradient, m3.TGradient, m4.TWeight, res.TGradient); Ops.AddMul(m4.TGradient, m4.TGradient, m3.TWeight, res.TGradient); res.Dispose(); }; this.backprop.Add(backward); } return(res); }
public void AddMulGradient(Tensor w, Tensor g) { if (m_TGradient == null) { allocator = TensorAllocator.Allocator(DeviceId); m_TGradient = new Tensor(allocator, DType.Float32, w.Sizes); Ops.Mul(m_TGradient, w, g); } else { Ops.AddMul(m_TGradient, m_TGradient, w, g); } }
public IWeightMatrix SoftmaxM(IWeightMatrix w, bool bp = true) { WeightTensor m = w as WeightTensor; var res = weightTensorFactory.CreateWeightTensor(m.Rows, m.Columns, deviceId, new Tensor(TensorAllocator.Allocator(deviceId), DType.Float32, m.Rows, m.Columns), bp); Tensor tTmp = new Tensor(TensorAllocator.Allocator(deviceId), DType.Float32, m.Rows, m.Columns); var maxval = Ops.Max(null, m.TWeight, 1); var maxvalM = maxval.Expand(m.Rows, m.Columns); Ops.ExpSub2(tTmp, m.TWeight, maxvalM); var sumV = Ops.Sum(null, tTmp, 1); var sumM = sumV.Expand(m.Rows, m.Columns); Ops.Div(res.TWeight, tTmp, sumM); maxval.Dispose(); maxvalM.Dispose(); sumV.Dispose(); sumM.Dispose(); if (this.needs_backprop && bp) { Action backward = () => { Ops.Mul(tTmp, res.TGradient, res.TWeight); Ops.Add(m.TGradient, m.TGradient, tTmp); var ss = Ops.Sum(null, tTmp, 1); var ssN = Ops.Neg(null, ss); var ssM = ssN.Expand(m.Rows, m.Columns); Ops.AddMul(m.TGradient, m.TGradient, res.TWeight, ssM); tTmp.Dispose(); ss.Dispose(); ssM.Dispose(); ssN.Dispose(); }; this.backprop.Add(backward); } else { tTmp.Dispose(); } return(res); }
public void AddMulGradient(Tensor w, Tensor g, bool inPlace = false) { if (m_TGradient == null) { m_allocator = TensorAllocator.Allocator(DeviceId); m_TGradient = new Tensor(m_allocator, DType.Float32, w.Sizes); Ops.Mul(m_TGradient, w, g); } else { if (inPlace) { Ops.Mul(m_TGradient, w, g); } else { Ops.AddMul(m_TGradient, m_TGradient, w, g); } } }