/// <summary> /// LayerNorm (src1 + src2) /// </summary> /// <param name="src1"></param> /// <param name="src2"></param> /// <param name="alpha"></param> /// <param name="beta"></param> /// <param name="eps"></param> /// <returns></returns> public IWeightTensor AddLayerNorm(IWeightTensor src1, IWeightTensor src2, IWeightTensor alpha, IWeightTensor beta, float eps = 1e-09f) { WeightTensor src1T = src1 as WeightTensor; WeightTensor src2T = src2 as WeightTensor; WeightTensor alphaT = alpha as WeightTensor; WeightTensor betaT = beta as WeightTensor; WeightTensor res = m_weightTensorFactory.CreateWeightTensor(src1T.Sizes, m_deviceId, name: $"{GetHashString(src1.Name, src2.Name, alpha.Name, beta.Name)}.AddLayerNorm"); VisualizeNodes(new IWeightTensor[] { src1, src2, alpha, beta }, res); Ops.AddLayerNorm(res.TWeight, src1T.TWeight, src2T.TWeight, alphaT.TWeight, betaT.TWeight, eps); if (m_needsBackprop) { Action backward = () => { Ops.AddLayerNormGrad(src1T.TGradient, src2T.TGradient, alphaT.TGradient, betaT.TGradient, res.TGradient, res.TWeight, src1T.TWeight, src2T.TWeight, alphaT.TWeight, betaT.TWeight, eps); res.Dispose(); }; m_backprop.Add(backward); src1T.UnbindFromComputeGraph(); src2T.UnbindFromComputeGraph(); alphaT.UnbindFromComputeGraph(); betaT.UnbindFromComputeGraph(); } return(res); }
/// <summary> /// LayerNorm (src1 + src2) /// </summary> /// <param name="src1"></param> /// <param name="src2"></param> /// <param name="alpha"></param> /// <param name="beta"></param> /// <param name="eps"></param> /// <returns></returns> public IWeightTensor AddLayerNorm(IWeightTensor src1, IWeightTensor src2, IWeightTensor alpha, IWeightTensor beta, float eps = 1e-09f) { var src1T = src1 as WeightTensor; var src2T = src2 as WeightTensor; var alphaT = alpha as WeightTensor; var betaT = beta as WeightTensor; var alphaTWExp = alphaT.TWeight.Expand(src1.Rows, src1.Columns); var betaTWExp = betaT.TWeight.Expand(src1.Rows, src1.Columns); var res = m_weightTensorFactory.CreateWeightTensor(src1T.Sizes, m_deviceId, name: $"{GetHashString(src1.Name, src2.Name, alpha.Name, beta.Name)}.AddLayerNorm"); VisualizeNodes(new IWeightTensor[] { src1, src2, alpha, beta }, res); Ops.AddLayerNorm(res.TWeight, src1T.TWeight, src2T.TWeight, alphaTWExp, betaTWExp, eps); if (m_needsBackprop) { Action backward = () => { using (var alphaTGExp = alphaT.TGradient.Expand(src1.Rows, src1.Columns)) { using (var betaTGExp = betaT.TGradient.Expand(src1.Rows, src1.Columns)) { Ops.AddLayerNormGrad(src1T.TGradient, src2T.TGradient, alphaTGExp, betaTGExp, res.TGradient, res.TWeight, src1T.TWeight, src2T.TWeight, alphaTWExp, betaTWExp, eps); } } alphaTWExp.Dispose(); betaTWExp.Dispose(); res.Dispose(); }; this.m_backprop.Add(backward); } else { alphaTWExp.Dispose(); betaTWExp.Dispose(); } return(res); }