public unsafe void Multiply() { int l = 10000; float[] v1 = new float[l]; for (int i = 0; i < l; i++) { v1[i] = i; } float[] v2 = new float[l]; for (int i = 0; i < l; i++) { v2[i] = i; } float[] res = new float[l]; fixed(float *a = v1, b = v2, y = res) VectorizationFloat.ElementWiseMultiplyAVX(a, b, y, res.Length); float[] res2 = new float[l]; for (int i = 0; i < l; i++) { res2[i] = i * i; } Assert.IsTrue(ArrayEqual(res, res2)); }
public static Tensor SoftmaxFloat32_GetGradient_0(Tensor s, Tensor sm) { Tensor combined = Tensor.Clone(s); long groupsize = sm.Shape[sm.Shape.N - 1]; for (long start = 0; start < combined.Shape.TotalSize; start += groupsize) { float averageK = VectorizationFloat.SumOfProduction((float *)s.Base.Array + start, (float *)sm.Base.Array + start, groupsize); VectorizationFloat.ElementWiseAddAVX((float *)combined.Base.Array + start, -averageK, (float *)combined.Base.Array + start, groupsize); } VectorizationFloat.ElementWiseMultiplyAVX((float *)combined.Base.Array, (float *)sm.Base.Array, (float *)combined.Base.Array, combined.Shape.TotalSize); return(combined); }
public static void MultiplyFloat32(Tensor res, Tensor a, Tensor b) { if (a.Shape.TotalSize > b.Shape.TotalSize) { Tensor temp = a; a = b; b = temp; } long go = res.Shape.TotalSize / a.Shape.TotalSize * a.Shape.TotalSize; for (long i = 0; i < go; i += a.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)a.Base.Array, (float *)b.Base.Array + i, (float *)res.Base.Array + i, a.Shape.TotalSize); } if (go < res.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)a.Base.Array, (float *)b.Base.Array + go, (float *)res.Base.Array + go, res.Shape.TotalSize - go); } }
public static void MultiplyFloat32_GetGradientA(Tensor gradienta, Tensor s, Tensor a, Tensor b) { if (s.Shape.TotalSize == a.Shape.TotalSize) { long go = s.Shape.TotalSize / b.Shape.TotalSize * b.Shape.TotalSize; for (long i = 0; i < go; i += b.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + i, (float *)b.Base.Array, (float *)gradienta.Base.Array + i, b.Shape.TotalSize); } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + go, (float *)b.Base.Array, (float *)gradienta.Base.Array + go, s.Shape.TotalSize - go); } } else if (s.Shape.TotalSize == b.Shape.TotalSize) { long go = s.Shape.TotalSize / a.Shape.TotalSize * a.Shape.TotalSize; for (long i = 0; i < go; i += a.Shape.TotalSize) { if (i == 0) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array, (float *)b.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize); } else { VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + i, (float *)b.Base.Array + i, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize); } } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + go, (float *)b.Base.Array + go, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, s.Shape.TotalSize - go); } } else { throw new Exception("Impossible reagion MultiplyFloat32_GetGradientA!"); } }