Example #1
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Sse3.IsSupported)
            {
                using (TestTable <float> floatTable = new TestTable <float>(new float[4] {
                    1, -5, 100, 0
                }, new float[4]))
                {
                    var vf1 = Sse.LoadVector128((float *)(floatTable.inArrayPtr));
                    var vf2 = Sse3.MoveLowAndDuplicate(vf1);
                    Unsafe.Write(floatTable.outArrayPtr, vf2);

                    if (BitConverter.SingleToInt32Bits(floatTable.inArray[0]) != BitConverter.SingleToInt32Bits(floatTable.outArray[0]) ||
                        BitConverter.SingleToInt32Bits(floatTable.inArray[0]) != BitConverter.SingleToInt32Bits(floatTable.outArray[1]) ||
                        BitConverter.SingleToInt32Bits(floatTable.inArray[2]) != BitConverter.SingleToInt32Bits(floatTable.outArray[2]) ||
                        BitConverter.SingleToInt32Bits(floatTable.inArray[2]) != BitConverter.SingleToInt32Bits(floatTable.outArray[3]))
                    {
                        Console.WriteLine("Sse3 MoveLowAndDuplicate failed on float:");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }
                }
            }

            return(testResult);
        }
        public static Vector4F DistanceSquared2D(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                return(Sse41.DotProduct(diff, diff, control));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                Vector4F mul = Sse.Multiply(diff, diff);

                // Set W and Z to zero
                Vector4F result = Sse.And(mul, MaskWAndZToZero);

                // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                return(Sse3.MoveLowAndDuplicate(result));
            }
            else if (Sse.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                Vector4F mul = Sse.Multiply(diff, diff);

                Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0));

                return(mul);
            }

            return(DistanceSquared2D_Software(left, right));
        }
Example #3
0
        public static VectorF Normalize2D(VectorFParam1_3 vector)
        {
            #region Manual Inline
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                VectorF    dp      = Sse41.DotProduct(vector, vector, control);

                return(Sse.Divide(vector, Sse.Sqrt(dp)));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                VectorF mul = Sse.Multiply(vector, vector);

                // Set W and Z to zero
                VectorF result = Sse.And(mul, MaskWAndZToZero);

                // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                VectorF dp = Sse3.MoveLowAndDuplicate(result);
                return(Sse.Divide(vector, Sse.Sqrt(dp)));
            }
            else if (Sse.IsSupported)
            {
                VectorF mul = Sse.Multiply(vector, vector);

                VectorF temp = Sse.Shuffle(mul, mul, Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, Shuffle(0, 0, 0, 0));

                return(Sse.Divide(vector, Sse.Sqrt(mul)));
            }
            #endregion

            return(Normalize2D_Software(vector));
        }
Example #4
0
        public static Vector128 <float> DotProduct2D(Vector128 <float> left, Vector128 <float> right)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                return(Sse41.DotProduct(left, right, control));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector128 <float> mul = Sse.Multiply(left, right);

                // Set W to zero
                Vector128 <float> result = Sse.And(mul, SingleConstants.MaskW);

                // Add X and Y horizontally, leaving the vector as (X+Y, Z+0, X+Y. Z+0)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                return(Sse3.MoveLowAndDuplicate(result));
            }
            else if (Sse.IsSupported)
            {
                Vector128 <float> mul = Sse.Multiply(left, right);

                Vector128 <float> temp = Sse.Shuffle(mul, mul, ShuffleValues.YYYY);

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, ShuffleValues.XXXX);

                return(mul);
            }

            return(DotProduct2D_Software(left, right));
        }
Example #5
0
 public static Vector128 <float> _mm_moveldup_ps(Vector128 <float> source)
 {
     return(Sse3.MoveLowAndDuplicate(source));
 }
Example #6
0
 public static __m128 _mm_moveldup_ps(__m128 a) => Sse3.MoveLowAndDuplicate(a);