Пример #1
0
        /// <summary>
        /// LayerNorm (src1 + src2)
        /// </summary>
        /// <param name="src1"></param>
        /// <param name="src2"></param>
        /// <param name="alpha"></param>
        /// <param name="beta"></param>
        /// <param name="eps"></param>
        /// <returns></returns>
        public IWeightTensor AddLayerNorm(IWeightTensor src1, IWeightTensor src2, IWeightTensor alpha, IWeightTensor beta, float eps = 1e-09f)
        {
            WeightTensor src1T  = src1 as WeightTensor;
            WeightTensor src2T  = src2 as WeightTensor;
            WeightTensor alphaT = alpha as WeightTensor;
            WeightTensor betaT  = beta as WeightTensor;

            WeightTensor res = m_weightTensorFactory.CreateWeightTensor(src1T.Sizes, m_deviceId, name: $"{GetHashString(src1.Name, src2.Name, alpha.Name, beta.Name)}.AddLayerNorm");

            VisualizeNodes(new IWeightTensor[] { src1, src2, alpha, beta }, res);

            Ops.AddLayerNorm(res.TWeight, src1T.TWeight, src2T.TWeight, alphaT.TWeight, betaT.TWeight, eps);
            if (m_needsBackprop)
            {
                Action backward = () =>
                {
                    Ops.AddLayerNormGrad(src1T.TGradient, src2T.TGradient, alphaT.TGradient, betaT.TGradient, res.TGradient, res.TWeight, src1T.TWeight, src2T.TWeight, alphaT.TWeight, betaT.TWeight, eps);

                    res.Dispose();
                };
                m_backprop.Add(backward);

                src1T.UnbindFromComputeGraph();
                src2T.UnbindFromComputeGraph();

                alphaT.UnbindFromComputeGraph();
                betaT.UnbindFromComputeGraph();
            }

            return(res);
        }
        /// <summary>
        /// LayerNorm (src1 + src2)
        /// </summary>
        /// <param name="src1"></param>
        /// <param name="src2"></param>
        /// <param name="alpha"></param>
        /// <param name="beta"></param>
        /// <param name="eps"></param>
        /// <returns></returns>
        public IWeightTensor AddLayerNorm(IWeightTensor src1, IWeightTensor src2, IWeightTensor alpha, IWeightTensor beta, float eps = 1e-09f)
        {
            var src1T  = src1 as WeightTensor;
            var src2T  = src2 as WeightTensor;
            var alphaT = alpha as WeightTensor;
            var betaT  = beta as WeightTensor;

            var alphaTWExp = alphaT.TWeight.Expand(src1.Rows, src1.Columns);
            var betaTWExp  = betaT.TWeight.Expand(src1.Rows, src1.Columns);

            var res = m_weightTensorFactory.CreateWeightTensor(src1T.Sizes, m_deviceId, name: $"{GetHashString(src1.Name, src2.Name, alpha.Name, beta.Name)}.AddLayerNorm");

            VisualizeNodes(new IWeightTensor[] { src1, src2, alpha, beta }, res);

            Ops.AddLayerNorm(res.TWeight, src1T.TWeight, src2T.TWeight, alphaTWExp, betaTWExp, eps);
            if (m_needsBackprop)
            {
                Action backward = () =>
                {
                    using (var alphaTGExp = alphaT.TGradient.Expand(src1.Rows, src1.Columns))
                    {
                        using (var betaTGExp = betaT.TGradient.Expand(src1.Rows, src1.Columns))
                        {
                            Ops.AddLayerNormGrad(src1T.TGradient, src2T.TGradient, alphaTGExp, betaTGExp, res.TGradient, res.TWeight, src1T.TWeight, src2T.TWeight, alphaTWExp, betaTWExp, eps);
                        }
                    }

                    alphaTWExp.Dispose();
                    betaTWExp.Dispose();

                    res.Dispose();
                };
                this.m_backprop.Add(backward);
            }
            else
            {
                alphaTWExp.Dispose();
                betaTWExp.Dispose();
            }

            return(res);
        }