public IWeightTensor EltMul(IWeightTensor w1, IWeightTensor w2) { WeightTensor m1 = w1 as WeightTensor; WeightTensor m2 = w2 as WeightTensor; WeightTensor res = m_weightTensorFactory.CreateWeightTensor(m1.Sizes, m_deviceId, name: $"{GetHashString(w1.Name, w2.Name)}.EltMul", graphToBind: this); VisualizeNodes(new IWeightTensor[] { w1, w2 }, res); Ops.Mul(res.TWeight, m1.TWeight, m2.TWeight); if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); m1.AddMulGradient(m2.TWeight, res.TGradient); m2.AddMulGradient(m1.TWeight, res.TGradient); res.Dispose(); }; m_backprop.Add(backward); m1.UnbindFromComputeGraph(); m2.UnbindFromComputeGraph(); } return(res); }
public IWeightTensor Dropout(IWeightTensor V, int batchSize, float drop_prob, bool inPlace = false) { if (drop_prob == 0 || !m_needsBackprop) { return(V); } // Generate noise tensor float p = 1.0f - drop_prob; Tensor noise = BuildRandomTensor(V.Rows, V.Columns, batchSize, p); WeightTensor w = V as WeightTensor; WeightTensor res = null; if (inPlace) { res = w.CopyWeightsRef($"{GetHashString(V.Name)}.Dropout"); } else { res = m_weightTensorFactory.CreateWeightTensor(w.Sizes, m_deviceId, name: $"{GetHashString(V.Name)}.Dropout", graphToBind: this); } VisualizeNodes(V, res); Ops.Mul(res.TWeight, w.TWeight, noise); Action backward = () => { res.ReleaseWeight(); if (inPlace) { w.TGradient = res.TGradient.CopyRef(); } w.AddMulGradient(noise, res.TGradient, inPlace); res.Dispose(); noise.Dispose(); }; m_backprop.Add(backward); return(res); }
/// <summary> /// Result = w1 * w2 + w3 * w4 /// </summary> /// <param name="w1"></param> /// <param name="w2"></param> /// <param name="w3"></param> /// <param name="w4"></param> /// <returns></returns> public IWeightTensor EltMulMulAdd(IWeightTensor w1, IWeightTensor w2, IWeightTensor w3, IWeightTensor w4) { WeightTensor m1 = w1 as WeightTensor; WeightTensor m2 = w2 as WeightTensor; WeightTensor m3 = w3 as WeightTensor; WeightTensor m4 = w4 as WeightTensor; WeightTensor res = m_weightTensorFactory.CreateWeightTensor(m1.Sizes, m_deviceId, name: $"{GetHashString(w1.Name, w2.Name, w3.Name, w4.Name)}.EltMulMulAdd", graphToBind: this); VisualizeNodes(new IWeightTensor[] { w1, w2, w3, w4 }, res); Ops.MulMulAdd(res.TWeight, m1.TWeight, m2.TWeight, m3.TWeight, m4.TWeight); if (m_needsBackprop) { Action backward = () => { res.ReleaseWeight(); m1.AddMulGradient(m2.TWeight, res.TGradient); m2.AddMulGradient(m1.TWeight, res.TGradient); m3.AddMulGradient(m4.TWeight, res.TGradient); m4.AddMulGradient(m3.TWeight, res.TGradient); res.Dispose(); }; m_backprop.Add(backward); // These tensors' weights will be used during back-propogation, so we unbind them from the computing graph m1.UnbindFromComputeGraph(); m2.UnbindFromComputeGraph(); m3.UnbindFromComputeGraph(); m4.UnbindFromComputeGraph(); } return(res); }