public static void SubtractFloat32(Tensor res, Tensor a, Tensor b) { if (a.Shape.TotalSize > b.Shape.TotalSize) { long go = res.Shape.TotalSize / b.Shape.TotalSize * b.Shape.TotalSize; for (long i = 0; i < go; i += b.Shape.TotalSize) { VectorizationFloat.ElementWiseSubtractAVX((float *)a.Base.Array + i, (float *)b.Base.Array, (float *)res.Base.Array + i, b.Shape.TotalSize); } if (go < res.Shape.TotalSize) { VectorizationFloat.ElementWiseSubtractAVX((float *)a.Base.Array + go, (float *)b.Base.Array, (float *)res.Base.Array + go, res.Shape.TotalSize - go); } } else { long go = res.Shape.TotalSize / a.Shape.TotalSize * a.Shape.TotalSize; for (long i = 0; i < go; i += a.Shape.TotalSize) { VectorizationFloat.ElementWiseSubtractAVX((float *)a.Base.Array, (float *)b.Base.Array + i, (float *)res.Base.Array + i, a.Shape.TotalSize); } if (go < res.Shape.TotalSize) { VectorizationFloat.ElementWiseSubtractAVX((float *)a.Base.Array, (float *)b.Base.Array + go, (float *)res.Base.Array + go, res.Shape.TotalSize - go); } } }
public static void AddFloat32_GetGradients(Tensor[] gradients, Tensor s, Tensor[] tensors) { for (int j = 0; j < gradients.Length; j++) { Tensor gradient = gradients[j]; //gradient.SetValue(0); long go = s.Shape.TotalSize / gradient.Shape.TotalSize * gradient.Shape.TotalSize; for (long i = 0; i < go; i += gradient.Shape.TotalSize) { if (go == 0) { VectorizationFloat.ElementWiseAssignAVX((float *)gradient.Base.Array, (float *)s.Base.Array + i, gradient.Shape.TotalSize); } else { VectorizationFloat.ElementWiseAddAVX((float *)s.Base.Array + i, (float *)gradient.Base.Array, (float *)gradient.Base.Array, gradient.Shape.TotalSize); } } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseAddAVX((float *)s.Base.Array + go, (float *)gradient.Base.Array, (float *)gradient.Base.Array, s.Shape.TotalSize - go); } } }
public unsafe void Add() { int l = 10000; float[] v1 = new float[l]; for (int i = 0; i < l; i++) { v1[i] = i; } float[] v2 = new float[l]; for (int i = 0; i < l; i++) { v2[i] = i; } float[] res = new float[l]; fixed(float *a = v1, b = v2, y = res) VectorizationFloat.ElementWiseAddAVX(a, b, y, res.Length); float[] res2 = new float[l]; for (int i = 0; i < l; i++) { res2[i] = i * 2; } Assert.IsTrue(ArrayEqual(res, res2)); }
public static void Power2Float32_GetGradient_0(Tensor combined, Tensor s, Tensor res) { float *ptr_combined = (float *)combined.Base.Array; float *ptr_s = (float *)s.Base.Array; VectorizationFloat.ElementWise_A_MultipliedBy_B_MultipliedBy_C((float *)res.Base.Array, ptr_s, 2, ptr_combined, res.Shape.TotalSize); }
public static void MatrixMultiplyFloat32_GetGradient_1(Tensor combinedright, Tensor s, Tensor A, Shape thisShape, Shape term0, Shape term1) { float *ptr_right = (float *)combinedright.Base.Array, ptr_a = (float *)A.Base.Array, ptr_s = (float *)s.Base.Array; VectorizationFloat.TransposeAandMatrixMultiply(ptr_a, A.Shape[0], A.Shape[1], ptr_s, thisShape[0], thisShape[1], ptr_right); //Derivative of B = Transpose(A)*s //A -> m,k //B -> k,n //s -> m,n }
public static void MatrixMultiplyFloat32_GetGradient_0(Tensor combinedleft, Tensor s, Tensor B, Shape thisShape, Shape term0, Shape term1) { float *ptr_left = (float *)combinedleft.Base.Array, ptr_s = (float *)s.Base.Array, ptr_b = (float *)B.Base.Array; VectorizationFloat.TransposeBandMatrixMultiply(ptr_s, (int)thisShape[0], (int)thisShape[1], ptr_b, (int)B.Shape[0], (int)B.Shape[1], ptr_left); //Derivative of A = s*Transpose(B) //A -> m,k //B -> k,n //s -> m,n }
public unsafe void SigmoidTest() { float[] v1 = { 1, 2, 3, 1, 2, 3, 1, 2, 3 }; float[] res = new float[9]; fixed(float *ptr_v1 = v1, ptr_res = res) VectorizationFloat.Sigmoid(ptr_v1, ptr_res, v1.LongLength); float[] res2 = { 0.731058359f, 0.8807941f, 0.95257f, 0.731058359f, 0.8807941f, 0.95257f, 0.731058359f, 0.8807941f, 0.95257f }; Assert.IsTrue(ArrayEqual(res, res2)); }
public unsafe void MakeNegative() { float[] v1 = { 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3 }; float[] res = new float[v1.Length]; fixed(float *a = v1, y = res) VectorizationFloat.MakeNegativeAVX(a, y, res.Length); float[] res2 = { -1, -2, -3, -1, -2, -3, -1, -2, -3, -1, -2, -3 }; Assert.IsTrue(ArrayEqual(res, res2)); }
public unsafe void SoftmaxTest() { float[] v1 = { 1, 2, 3, 1 }; float[] res = new float[4]; fixed(float *ptr_v1 = v1, ptr_res = res) VectorizationFloat.Softmax(ptr_v1, ptr_res, 2, v1.Length); float[] res2 = { 0.268932253f, 0.7310678f, 0.8808078f, 0.119192213f }; //todo check the res //Assert.IsTrue(ArrayEqual(res, res2)); }
public unsafe void SetValue(float[,,] n) { for (int i = 0; i < this.Shape.N; i++) if (this.Shape[i] != n.GetLength(i)) { throw new Exception("The Matrix should have the same dimensions with the Variable!"); } fixed(float *ptr = n) { VectorizationFloat.ElementWiseAssignAVX((float *)this.Weights.Array, ptr, this.Shape.TotalSize); } }
public void MatrixSetZero() { int Size = 123; float[] v1 = new float[Size]; float[] v2 = new float[Size]; for (int i = 0; i < v1.Length; i++) { v1[i] = i; } VectorizationFloat.ElementWiseSetValueAVX(v1, 0, v1.Length); Assert.IsTrue(ArrayEqual(v1, v2)); }
public void Assigning() { int Size = 123; float[] v1 = new float[Size]; float[] v2 = new float[Size]; for (int i = 0; i < v1.Length; i++) { v1[i] = i; } VectorizationFloat.ElementWiseAssignAVX(v2, v1, v1.Length); Assert.IsTrue(ArrayEqual(v1, v2)); }
public unsafe void Add2() { float[] v1 = { 1, 2, 3 }; float v2 = 2; float[] res = new float[v1.Length]; fixed(float *a = v1, y = res) VectorizationFloat.ElementWiseAddAVX(a, v2, y, res.Length); float[] res2 = { 3, 4, 5 }; Assert.IsTrue(ArrayEqual(res, res2)); }
public unsafe void UpdateWeights(Trainable v, Tensor g) { //Console.WriteLine("Updating The Variable with ID " + v.UniqueId); trainable should have uniqueID //Console.Write("Updating The Variable with ID " + v.UniqueId); //Console.WriteLine(" -> " + g); if (g.Shape.EqualShape(v.Weights.Shape)) { float *ptr_v = (float *)v.Weights.Array; float *ptr_m = (float *)g.Array; VectorizationFloat.ElementWiseAddAVXBetaB(ptr_v, ptr_m, ptr_v, v.Weights.Shape.TotalSize, -v.LearningRateMultiplier * Hyperparameters.LearningRate); } else { throw new DimensionIncompability("The shapes of Variable and Gradient are different!"); } }
public static Tensor SoftmaxFloat32_GetGradient_0(Tensor s, Tensor sm) { Tensor combined = Tensor.Clone(s); long groupsize = sm.Shape[sm.Shape.N - 1]; for (long start = 0; start < combined.Shape.TotalSize; start += groupsize) { float averageK = VectorizationFloat.SumOfProduction((float *)s.Base.Array + start, (float *)sm.Base.Array + start, groupsize); VectorizationFloat.ElementWiseAddAVX((float *)combined.Base.Array + start, -averageK, (float *)combined.Base.Array + start, groupsize); } VectorizationFloat.ElementWiseMultiplyAVX((float *)combined.Base.Array, (float *)sm.Base.Array, (float *)combined.Base.Array, combined.Shape.TotalSize); return(combined); }
public void Equality() { { float[] v1 = { 1, 2, 3 }; float[] v2 = { 1, 2, 3 }; bool res = VectorizationFloat.ElementWiseIsEqualsAVX(v1, v2, v1.Length); bool res2 = true; Assert.AreEqual(res, res2); } { float[] v1 = { 1, 2, 2 }; float[] v2 = { 1, 2, 3 }; bool res = VectorizationFloat.ElementWiseIsEqualsAVX(v1, v2, v1.Length); bool res2 = false; Assert.AreEqual(res, res2); } }
public unsafe void SoftmaxTest2() { float[] v1 = new float[128]; for (int i = 0; i < v1.Length; i++) { v1[i] = 1; } float[] res = new float[128]; fixed(float *ptr_v1 = v1, ptr_res = res) VectorizationFloat.Softmax(ptr_v1, ptr_res, 64, v1.Length); float[] res2 = new float[128]; for (int i = 0; i < res2.Length; i++) res2[i] = 1f / 64; }
public static Tensor[] AddFloat32_GetGradients(Tensor s, Tensor[] tensors, bool generateseperately = false) { Tensor[] gradients = new Tensor[tensors.Length]; Dictionary <long, Tensor> dict = new Dictionary <long, Tensor>(); dict[s.Shape.TotalSize] = s; for (int j = 0; j < gradients.Length; j++) { Tensor a = tensors[j]; if (!generateseperately && dict.ContainsKey(a.Shape.TotalSize)) { gradients[j] = dict[a.Shape.TotalSize]; } else { Tensor gradient = new Tensor(a.Shape.Clone(), TensorConfig.Host_Float32); //gradient.SetValue(0); long go = s.Shape.TotalSize / gradient.Shape.TotalSize * gradient.Shape.TotalSize; for (long i = 0; i < go; i += gradient.Shape.TotalSize) { if (go == 0) { VectorizationFloat.ElementWiseAssignAVX((float *)gradient.Base.Array, (float *)s.Base.Array + i, gradient.Shape.TotalSize); } else { VectorizationFloat.ElementWiseAddAVX((float *)s.Base.Array + i, (float *)gradient.Base.Array, (float *)gradient.Base.Array, gradient.Shape.TotalSize); } } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseAddAVX((float *)s.Base.Array + go, (float *)gradient.Base.Array, (float *)gradient.Base.Array, s.Shape.TotalSize - go); } } } return(gradients); }
public static void MultiplyFloat32(Tensor res, Tensor a, Tensor b) { if (a.Shape.TotalSize > b.Shape.TotalSize) { Tensor temp = a; a = b; b = temp; } long go = res.Shape.TotalSize / a.Shape.TotalSize * a.Shape.TotalSize; for (long i = 0; i < go; i += a.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)a.Base.Array, (float *)b.Base.Array + i, (float *)res.Base.Array + i, a.Shape.TotalSize); } if (go < res.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)a.Base.Array, (float *)b.Base.Array + go, (float *)res.Base.Array + go, res.Shape.TotalSize - go); } }
public unsafe void DotProductPointer() { int size = 101; float[] v1 = new float[size]; float[] v2 = new float[size]; for (int i = 0; i < size; i++) { v1[i] = v2[i] = i; fixed(float *ptr = v1, ptr2 = v2) { double res = VectorizationFloat.DotProductFMA(ptr, ptr2, size); double res2 = 0; for (int i = 0; i < size; i++) { res2 += v1[i] * v2[i]; } Assert.AreEqual(res, res2); } }
public void DotProduct() { int size = 101; float[] v1 = new float[size]; float[] v2 = new float[size]; for (int i = 0; i < size; i++) { v1[i] = v2[i] = i; } double res = VectorizationFloat.DotProductFMA(v1, v2, size); double res2 = 0; for (int i = 0; i < size; i++) { res2 += v1[i] * v2[i]; } Assert.AreEqual(res, res2); }
public static void SubtractFloat32_GetGradientA(Tensor gradienta, Tensor s, Tensor a) { long go = s.Shape.TotalSize / gradienta.Shape.TotalSize * gradienta.Shape.TotalSize; for (long i = 0; i < go; i += gradienta.Shape.TotalSize) { if (i == 0) { VectorizationFloat.ElementWiseAssignAVX((float *)gradienta.Base.Array, (float *)s.Base.Array + i, gradienta.Shape.TotalSize); } else { VectorizationFloat.ElementWiseAddAVX((float *)s.Base.Array + i, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize); } } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseAddAVX((float *)s.Base.Array + go, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, s.Shape.TotalSize - go); } }
public static void ExpandFloat32_GetGradient_0(Tensor res, Tensor s, Shape thisShape, Shape term0, Shape Multiplier) { res.SetValue(0); float *ptrcombined = (float *)res.Base.Array; float *ptrs = (float *)s.Base.Array; if (Multiplier.N == 2 && Multiplier[1] == 1) { for (int i = 0; i < Multiplier[0]; i++) { float *me = ((float *)s.Base.Array) + i * term0.TotalSize; VectorizationFloat.ElementWiseAddAVX((float *)res.Base.Array, me, (float *)res.Base.Array, term0.TotalSize); } } else { Index iterator = new Index(thisShape); iterator.SetZero(); for (int h = 0; h < thisShape.TotalSize; h++) { long indexs = 0; for (int i = iterator.N - 1; i >= 0; i--) { if (iterator.Indices[i] == thisShape[i]) { iterator.Indices[i] = 0; iterator.Indices[i - 1]++; } indexs += (iterator.Indices[i] / Multiplier[i]) * term0.Multiplied[i + 1]; } ptrcombined[indexs] += ptrs[h]; iterator.Indices[iterator.N - 1]++; } } }
public static void ExpandFloat32(Tensor res, Tensor v, Shape thisShape, Shape term0, Shape Multiplier) { float *ptrres = (float *)res.Base.Array; float *ptrv = (float *)v.Base.Array; if (Multiplier.N == 2 && Multiplier[1] == 1) { for (int i = 0; i < Multiplier[0]; i++) { float *me = ((float *)res.Base.Array) + i * term0.TotalSize; VectorizationFloat.ElementWiseAssignAVX(me, (float *)v.Base.Array, term0.TotalSize); } } else { Index iterator = new Index(res.Shape); for (int i = 0; i < iterator.N; i++) { iterator.Indices[i] = 0; } for (int h = 0; h < res.Shape.TotalSize; h++) { long indexs = 0; for (int i = iterator.N - 1; i >= 0; i--) { if (iterator.Indices[i] == res.Shape[i]) { iterator.Indices[i] = 0; iterator.Indices[i - 1]++; } indexs += (iterator.Indices[i] / Multiplier[i]) * v.Shape.Multiplied[i + 1]; } ptrres[h] = ptrv[indexs]; iterator.Indices[iterator.N - 1]++; } } }
public static void MultiplyFloat32_GetGradientA(Tensor gradienta, Tensor s, Tensor a, Tensor b) { if (s.Shape.TotalSize == a.Shape.TotalSize) { long go = s.Shape.TotalSize / b.Shape.TotalSize * b.Shape.TotalSize; for (long i = 0; i < go; i += b.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + i, (float *)b.Base.Array, (float *)gradienta.Base.Array + i, b.Shape.TotalSize); } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array + go, (float *)b.Base.Array, (float *)gradienta.Base.Array + go, s.Shape.TotalSize - go); } } else if (s.Shape.TotalSize == b.Shape.TotalSize) { long go = s.Shape.TotalSize / a.Shape.TotalSize * a.Shape.TotalSize; for (long i = 0; i < go; i += a.Shape.TotalSize) { if (i == 0) { VectorizationFloat.ElementWiseMultiplyAVX((float *)s.Base.Array, (float *)b.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize); } else { VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + i, (float *)b.Base.Array + i, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, gradienta.Shape.TotalSize); } } if (go < s.Shape.TotalSize) { VectorizationFloat.ElementWiseFMA((float *)s.Base.Array + go, (float *)b.Base.Array + go, (float *)gradienta.Base.Array, (float *)gradienta.Base.Array, s.Shape.TotalSize - go); } } else { throw new Exception("Impossible reagion MultiplyFloat32_GetGradientA!"); } }
public unsafe void Multiply2() { int l = 1000000; float[] v1 = new float[l]; for (int i = 0; i < l; i++) { v1[i] = i; } float v2 = 3; float[] res = new float[l]; fixed(float *a = v1, y = res) VectorizationFloat.ElementWiseMultiplyAVX(a, v2, y, res.Length); float[] res2 = new float[l]; for (int i = 0; i < l; i++) { res2[i] = i * 3; } Assert.IsTrue(ArrayEqual(res, res2)); }
public static void ReluFloat32_GetGradient_0(Tensor combined, Tensor gradient, Tensor v) { VectorizationFloat.ReluFloatGradientCalculation((float *)gradient.Base.Array, (float *)v.Base.Array, (float *)combined.Base.Array, combined.Shape.TotalSize); }
public static void MatrixMultiplyFloat32(Tensor res, Tensor a, Tensor b) { VectorizationFloat.MatrixMultiply(a, b, res); }
public static void SigmoidFloat32(Tensor res, Tensor v) { VectorizationFloat.Sigmoid((float *)v.Base.Array, (float *)res.Base.Array, res.Shape.TotalSize); }
public static void SigmoidFloat32_GetGradient_0(Tensor combined, Tensor s, Tensor sigmo) { VectorizationFloat.ElementWise_A_MultipliedBy_1_Minus_A_MultipliedByB((float *)sigmo.Base.Array, (float *)s.Base.Array, (float *)combined.Base.Array, sigmo.Shape.TotalSize); }