示例#1
0
        public static void MultiplyFloat32_GetGradientA(Tensor gradienta, Tensor s, Tensor a, Tensor b)
        {
            if (s.Shape.TotalSize == a.Shape.TotalSize)
            {
                long go = s.Shape.TotalSize / b.Shape.TotalSize * b.Shape.TotalSize;
                for (long i = 0; i < go; i += b.Shape.TotalSize)
                {
                    VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + i, (float *)b.Base.Array, (float *)gradienta.Base.Array + i, b.Shape.TotalSize);
                }
                if (go < s.Shape.TotalSize)
                {
                    VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + go, (float *)b.Base.Array, (float *)gradienta.Base.Array + go, s.Shape.TotalSize - go);
                }
            }
            else if (s.Shape.TotalSize == b.Shape.TotalSize)
            {
                long go = s.Shape.TotalSize / a.Shape.TotalSize * a.Shape.TotalSize;
                for (long i = 0; i < go; i += a.Shape.TotalSize)
                {
                    if (i == 0)
                    {
                        VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array, (float *)b.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize);
                    }
                    else
                    {
                        VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + i, (float *)b.Base.Array + i, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize);
                    }
                }

                if (go < s.Shape.TotalSize)
                {
                    VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + go, (float *)b.Base.Array + go, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, s.Shape.TotalSize - go);
                }
            }
            else
            {
                throw new Exception("Impossible reagion MultiplyFloat32_GetGradientA!");
            }
        }