static unsafe int Main(string[] args) { int testResult = Pass; if (Sse3.IsSupported) { using (TestTable <float> floatTable = new TestTable <float>(new float[4] { 1, -5, 100, 0 }, new float[4])) { var vf1 = Sse.LoadVector128((float *)(floatTable.inArrayPtr)); var vf2 = Sse3.MoveLowAndDuplicate(vf1); Unsafe.Write(floatTable.outArrayPtr, vf2); if (BitConverter.SingleToInt32Bits(floatTable.inArray[0]) != BitConverter.SingleToInt32Bits(floatTable.outArray[0]) || BitConverter.SingleToInt32Bits(floatTable.inArray[0]) != BitConverter.SingleToInt32Bits(floatTable.outArray[1]) || BitConverter.SingleToInt32Bits(floatTable.inArray[2]) != BitConverter.SingleToInt32Bits(floatTable.outArray[2]) || BitConverter.SingleToInt32Bits(floatTable.inArray[2]) != BitConverter.SingleToInt32Bits(floatTable.outArray[3])) { Console.WriteLine("Sse3 MoveLowAndDuplicate failed on float:"); foreach (var item in floatTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); testResult = Fail; } } } return(testResult); }
public static Vector4F DistanceSquared2D(Vector4FParam1_3 left, Vector4FParam1_3 right) { // SSE4.1 has a native dot product instruction, dpps if (Sse41.IsSupported) { Vector4F diff = Sse.Subtract(left, right); // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector const byte control = 0b_0011_1111; return(Sse41.DotProduct(diff, diff, control)); } // We can use SSE to vectorize the multiplication // There are different fastest methods to sum the resultant vector // on SSE3 vs SSE1 else if (Sse3.IsSupported) { Vector4F diff = Sse.Subtract(left, right); Vector4F mul = Sse.Multiply(diff, diff); // Set W and Z to zero Vector4F result = Sse.And(mul, MaskWAndZToZero); // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?) result = Sse3.HorizontalAdd(result, result); // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z) return(Sse3.MoveLowAndDuplicate(result)); } else if (Sse.IsSupported) { Vector4F diff = Sse.Subtract(left, right); Vector4F mul = Sse.Multiply(diff, diff); Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(1, 1, 1, 1)); mul = Sse.AddScalar(mul, temp); mul = Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0)); return(mul); } return(DistanceSquared2D_Software(left, right)); }
public static VectorF Normalize2D(VectorFParam1_3 vector) { #region Manual Inline // SSE4.1 has a native dot product instruction, dpps if (Sse41.IsSupported) { // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector const byte control = 0b_0011_1111; VectorF dp = Sse41.DotProduct(vector, vector, control); return(Sse.Divide(vector, Sse.Sqrt(dp))); } // We can use SSE to vectorize the multiplication // There are different fastest methods to sum the resultant vector // on SSE3 vs SSE1 else if (Sse3.IsSupported) { VectorF mul = Sse.Multiply(vector, vector); // Set W and Z to zero VectorF result = Sse.And(mul, MaskWAndZToZero); // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?) result = Sse3.HorizontalAdd(result, result); // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z) VectorF dp = Sse3.MoveLowAndDuplicate(result); return(Sse.Divide(vector, Sse.Sqrt(dp))); } else if (Sse.IsSupported) { VectorF mul = Sse.Multiply(vector, vector); VectorF temp = Sse.Shuffle(mul, mul, Shuffle(1, 1, 1, 1)); mul = Sse.AddScalar(mul, temp); mul = Sse.Shuffle(mul, mul, Shuffle(0, 0, 0, 0)); return(Sse.Divide(vector, Sse.Sqrt(mul))); } #endregion return(Normalize2D_Software(vector)); }
public static Vector128 <float> DotProduct2D(Vector128 <float> left, Vector128 <float> right) { // SSE4.1 has a native dot product instruction, dpps if (Sse41.IsSupported) { // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector const byte control = 0b_0011_1111; return(Sse41.DotProduct(left, right, control)); } // We can use SSE to vectorize the multiplication // There are different fastest methods to sum the resultant vector // on SSE3 vs SSE1 else if (Sse3.IsSupported) { Vector128 <float> mul = Sse.Multiply(left, right); // Set W to zero Vector128 <float> result = Sse.And(mul, SingleConstants.MaskW); // Add X and Y horizontally, leaving the vector as (X+Y, Z+0, X+Y. Z+0) result = Sse3.HorizontalAdd(result, result); // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z) return(Sse3.MoveLowAndDuplicate(result)); } else if (Sse.IsSupported) { Vector128 <float> mul = Sse.Multiply(left, right); Vector128 <float> temp = Sse.Shuffle(mul, mul, ShuffleValues.YYYY); mul = Sse.AddScalar(mul, temp); mul = Sse.Shuffle(mul, mul, ShuffleValues.XXXX); return(mul); } return(DotProduct2D_Software(left, right)); }
public static Vector128 <float> _mm_moveldup_ps(Vector128 <float> source) { return(Sse3.MoveLowAndDuplicate(source)); }
public static __m128 _mm_moveldup_ps(__m128 a) => Sse3.MoveLowAndDuplicate(a);