public static Tensor SoftmaxFloat32_GetGradient_0(Tensor s, Tensor sm) { Tensor combined = Tensor.Clone(s); long groupsize = sm.Shape[sm.Shape.N - 1]; for (long start = 0; start < combined.Shape.TotalSize; start += groupsize) { float averageK = VectorizationFloat.SumOfProduction((float *)s.Base.Array + start, (float *)sm.Base.Array + start, groupsize); VectorizationFloat.ElementWiseAddAVX((float *)combined.Base.Array + start, -averageK, (float *)combined.Base.Array + start, groupsize); } VectorizationFloat.ElementWiseMultiplyAVX((float *)combined.Base.Array, (float *)sm.Base.Array, (float *)combined.Base.Array, combined.Shape.TotalSize); return(combined); }