public static void MultiplyFloat32_GetGradientA(Tensor gradienta, Tensor s, Tensor a, Tensor b) { if (s.Shape.TotalSize == a.Shape.TotalSize) { long go = s.Shape.TotalSize / b.Shape.TotalSize * b.Shape.TotalSize; for (long i = 0; i < go; i += b.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + i, (float *)b.Base.Array, (float *)gradienta.Base.Array + i, b.Shape.TotalSize); } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + go, (float *)b.Base.Array, (float *)gradienta.Base.Array + go, s.Shape.TotalSize - go); } } else if (s.Shape.TotalSize == b.Shape.TotalSize) { long go = s.Shape.TotalSize / a.Shape.TotalSize * a.Shape.TotalSize; for (long i = 0; i < go; i += a.Shape.TotalSize) { if (i == 0) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array, (float *)b.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize); } else { VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + i, (float *)b.Base.Array + i, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize); } } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + go, (float *)b.Base.Array + go, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, s.Shape.TotalSize - go); } } else { throw new Exception("Impossible reagion MultiplyFloat32_GetGradientA!"); } }