Esempio n. 1
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Sse.IsSupported)
            {
                using (TestTable <float> floatTable = new TestTable <float>(new float[4] {
                    1, -5, 100, 0
                }, new float[4] {
                    22, -1, -50, 3
                }, new float[4]))
                {
                    var vf1 = Unsafe.Read <Vector128 <float> >(floatTable.inArray1Ptr);
                    var vf2 = Unsafe.Read <Vector128 <float> >(floatTable.inArray2Ptr);
                    var vf3 = Sse.AddScalar(vf1, vf2);
                    Unsafe.Write(floatTable.outArrayPtr, vf3);

                    if (!floatTable.CheckResult((x, y, z) => (z[0] == (x[0] + y[0])) &&
                                                (z[1] == x[1]) && (z[2] == x[2]) && (z[3] == x[3])))
                    {
                        Console.WriteLine("SSE AddScalar failed on float:");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }
                }
            }


            return(testResult);
        }
        public static Vector4F DistanceSquared4D(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse41.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);
                // This multiplies the first 4 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_1111_1111;
                return(Sse41.DotProduct(diff, diff, control));
            }
            else if (Sse3.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);
                Vector4F mul  = Sse.Multiply(diff, diff);
                mul = Sse3.HorizontalAdd(mul, mul);
                return(Sse3.HorizontalAdd(mul, mul));
            }
            else if (Sse.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);
                Vector4F copy = diff;
                Vector4F mul  = Sse.Multiply(diff, copy);
                copy = Sse.Shuffle(copy, mul, Helpers.Shuffle(1, 0, 0, 0));
                copy = Sse.Add(copy, mul);
                mul  = Sse.Shuffle(mul, copy, Helpers.Shuffle(0, 3, 0, 0));
                mul  = Sse.AddScalar(mul, copy);

                return(Sse.Shuffle(mul, mul, Helpers.Shuffle(2, 2, 2, 2)));
            }

            return(DistanceSquared4D_Software(left, right));
        }
Esempio n. 3
0
            public void RunStructFldScenario(SimpleBinaryOpTest__AddScalarSingle testClass)
            {
                var result = Sse.AddScalar(_fld1, _fld2);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
            }
Esempio n. 4
0
        public static VectorF Normalize4D(VectorFParam1_3 vector)
        {
            if (Sse41.IsSupported)
            {
                // This multiplies the first 4 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_1111_1111;
                return(Sse.Divide(vector, Sse41.DotProduct(vector, vector, control)));
            }
            else if (Sse3.IsSupported)
            {
                VectorF mul = Sse.Multiply(vector, vector);
                mul = Sse3.HorizontalAdd(mul, mul);
                return(Sse.Divide(vector, Sse.Sqrt(Sse3.HorizontalAdd(mul, mul))));
            }
            else if (Sse.IsSupported)
            {
                VectorF copy = vector;
                VectorF mul  = Sse.Multiply(vector, copy);
                copy = Sse.Shuffle(copy, mul, Shuffle(1, 0, 0, 0));
                copy = Sse.Add(copy, mul);
                mul  = Sse.Shuffle(mul, copy, Shuffle(0, 3, 0, 0));
                mul  = Sse.AddScalar(mul, copy);

                return(Sse.Divide(vector, Sse.Sqrt(Sse.Shuffle(mul, mul, Shuffle(2, 2, 2, 2)))));
            }

            return(Normalize4D_Software(vector));
        }
Esempio n. 5
0
        public static VectorF DotProduct4D(VectorFParam1_3 left, VectorFParam1_3 right)
        {
            if (Sse41.IsSupported)
            {
                // This multiplies the first 4 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_1111_1111;
                return(Sse41.DotProduct(left, right, control));
            }
            else if (Sse3.IsSupported)
            {
                VectorF mul = Sse.Multiply(left, right);
                mul = Sse3.HorizontalAdd(mul, mul);
                return(Sse3.HorizontalAdd(mul, mul));
            }
            else if (Sse.IsSupported)
            {
                VectorF copy = right;
                VectorF mul  = Sse.Multiply(left, copy);
                copy = Sse.Shuffle(copy, mul, Shuffle(1, 0, 0, 0));
                copy = Sse.Add(copy, mul);
                mul  = Sse.Shuffle(mul, copy, Shuffle(0, 3, 0, 0));
                mul  = Sse.AddScalar(mul, copy);

                return(Sse.Shuffle(mul, mul, Shuffle(2, 2, 2, 2)));
            }

            return(DotProduct4D_Software(left, right));
        }
Esempio n. 6
0
        public void RunFldScenario()
        {
            var result = Sse.AddScalar(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArray);
        }
Esempio n. 7
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Sse.AddScalar(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Esempio n. 8
0
        public void RunLclFldScenario()
        {
            var test = new SimpleBinaryOpTest__AddScalarSingle();
            var result = Sse.AddScalar(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArray);
        }
Esempio n. 9
0
        public void RunLclVarScenario_LoadAligned()
        {
            var left   = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr));
            var right  = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.AddScalar(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Esempio n. 10
0
        public void RunLclVarScenario()
        {
            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
            var result = Sse.AddScalar(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArray);
        }
Esempio n. 11
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Sse.AddScalar(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
Esempio n. 12
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Sse.AddScalar(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Esempio n. 13
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Sse.AddScalar(
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)),
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Esempio n. 14
0
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleBinaryOpTest__AddScalarSingle();
            var result = Sse.AddScalar(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Esempio n. 15
0
        public void RunClsVarScenario()
        {
            var result = Sse.AddScalar(
                _clsVar1,
                _clsVar2
            );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArray);
        }
Esempio n. 16
0
        public void RunBasicScenario()
        {
            var result = Sse.AddScalar(
                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr)
            );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1, _dataTable.inArray2, _dataTable.outArray);
        }
Esempio n. 17
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr));
            var op2    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.AddScalar(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
Esempio n. 18
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr);
            var result = Sse.AddScalar(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var left   = Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr));
            var right  = Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.AddScalar(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Esempio n. 20
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Sse.AddScalar(
                Sse.LoadVector128((Single *)(&test._fld1)),
                Sse.LoadVector128((Single *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Esempio n. 21
0
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__AddScalarSingle testClass)
            {
                fixed(Vector128 <Single> *pFld1 = &_fld1)
                fixed(Vector128 <Single> *pFld2 = &_fld2)
                {
                    var result = Sse.AddScalar(
                        Sse.LoadVector128((Single *)(pFld1)),
                        Sse.LoadVector128((Single *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
Esempio n. 22
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Single> *pFld1 = &_fld1)
            fixed(Vector128 <Single> *pFld2 = &_fld2)
            {
                var result = Sse.AddScalar(
                    Sse.LoadVector128((Single *)(pFld1)),
                    Sse.LoadVector128((Single *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
Esempio n. 23
0
        public static Vector4F DistanceSquared2D(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                return(Sse41.DotProduct(diff, diff, control));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                Vector4F mul = Sse.Multiply(diff, diff);

                // Set W and Z to zero
                Vector4F result = Sse.And(mul, MaskWAndZToZero);

                // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                return(Sse3.MoveLowAndDuplicate(result));
            }
            else if (Sse.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                Vector4F mul = Sse.Multiply(diff, diff);

                Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0));

                return(mul);
            }

            return(DistanceSquared2D_Software(left, right));
        }
Esempio n. 24
0
        public static VectorF Normalize2D(VectorFParam1_3 vector)
        {
            #region Manual Inline
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                VectorF    dp      = Sse41.DotProduct(vector, vector, control);

                return(Sse.Divide(vector, Sse.Sqrt(dp)));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                VectorF mul = Sse.Multiply(vector, vector);

                // Set W and Z to zero
                VectorF result = Sse.And(mul, MaskWAndZToZero);

                // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                VectorF dp = Sse3.MoveLowAndDuplicate(result);
                return(Sse.Divide(vector, Sse.Sqrt(dp)));
            }
            else if (Sse.IsSupported)
            {
                VectorF mul = Sse.Multiply(vector, vector);

                VectorF temp = Sse.Shuffle(mul, mul, Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, Shuffle(0, 0, 0, 0));

                return(Sse.Divide(vector, Sse.Sqrt(mul)));
            }
            #endregion

            return(Normalize2D_Software(vector));
        }
Esempio n. 25
0
        public static VectorF DotProduct3D(VectorFParam1_3 left, VectorFParam1_3 right)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 3 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0111_1111;
                return(Sse41.DotProduct(left, right, control));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                VectorF mul = Multiply(left, right);

                // Set W to zero
                VectorF result = And(mul, MaskWToZero);

                // Doubly horizontally adding fills the final vector with the sum
                result = HorizontalAdd(result, result);
                return(HorizontalAdd(result, result));
            }
            else if (Sse.IsSupported)
            {
                // Multiply to get the needed values
                VectorF mul = Multiply(left, right);


                // Shuffle around the values and AddScalar them
                VectorF temp = Sse.Shuffle(mul, mul, Shuffle(2, 1, 2, 1));

                mul = Sse.AddScalar(mul, temp);

                temp = Sse.Shuffle(temp, temp, Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                return(Sse.Shuffle(mul, mul, Shuffle(0, 0, 0, 0)));
            }

            return(DotProduct3D_Software(left, right));
        }
Esempio n. 26
0
        public static Vector128 <float> DotProduct2D(Vector128 <float> left, Vector128 <float> right)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                return(Sse41.DotProduct(left, right, control));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector128 <float> mul = Sse.Multiply(left, right);

                // Set W to zero
                Vector128 <float> result = Sse.And(mul, SingleConstants.MaskW);

                // Add X and Y horizontally, leaving the vector as (X+Y, Z+0, X+Y. Z+0)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                return(Sse3.MoveLowAndDuplicate(result));
            }
            else if (Sse.IsSupported)
            {
                Vector128 <float> mul = Sse.Multiply(left, right);

                Vector128 <float> temp = Sse.Shuffle(mul, mul, ShuffleValues.YYYY);

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, ShuffleValues.XXXX);

                return(mul);
            }

            return(DotProduct2D_Software(left, right));
        }
Esempio n. 27
0
        unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy)
        {
            float *tp = (float *)tstart, tpe = (float *)(tstart + cb);
            float *pmapx   = (float *)mapxstart;
            int    kstride = smapx * channels;
            int    tstride = smapy * 4;
            int    vcnt    = smapx / Vector128 <float> .Count;

            while (tp < tpe)
            {
                int ix   = *(int *)pmapx++;
                int lcnt = vcnt;

                float *ip = (float *)istart + ix * channels;
                float *mp = pmapx;
                pmapx += kstride;

                Vector128 <float> av0, av1, av2;

                if (Avx.IsSupported && lcnt >= 2)
                {
                    Vector256 <float> ax0 = Vector256 <float> .Zero, ax1 = ax0, ax2 = ax0;

                    for (; lcnt >= 2; lcnt -= 2)
                    {
                        var iv0 = Avx.LoadVector256(ip);
                        var iv1 = Avx.LoadVector256(ip + Vector256 <float> .Count);
                        var iv2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2);
                        ip += Vector256 <int> .Count * channels;

                        if (Fma.IsSupported)
                        {
                            ax0 = Fma.MultiplyAdd(Avx.LoadVector256(mp), iv0, ax0);
                            ax1 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count), iv1, ax1);
                            ax2 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count * 2), iv2, ax2);
                        }
                        else
                        {
                            ax0 = Avx.Add(ax0, Avx.Multiply(iv0, Avx.LoadVector256(mp)));
                            ax1 = Avx.Add(ax1, Avx.Multiply(iv1, Avx.LoadVector256(mp + Vector256 <float> .Count)));
                            ax2 = Avx.Add(ax2, Avx.Multiply(iv2, Avx.LoadVector256(mp + Vector256 <float> .Count * 2)));
                        }
                        mp += Vector256 <float> .Count * channels;
                    }

                    av0 = Sse.Add(ax0.GetLower(), ax1.GetUpper());
                    av1 = Sse.Add(ax0.GetUpper(), ax2.GetLower());
                    av2 = Sse.Add(ax1.GetLower(), ax2.GetUpper());
                }
                else
                {
                    av0 = av1 = av2 = Vector128 <float> .Zero;
                }

                for (; lcnt != 0; lcnt--)
                {
                    var iv0 = Sse.LoadVector128(ip);
                    var iv1 = Sse.LoadVector128(ip + Vector128 <float> .Count);
                    var iv2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2);
                    ip += Vector128 <float> .Count * channels;

                    if (Fma.IsSupported)
                    {
                        av0 = Fma.MultiplyAdd(Sse.LoadVector128(mp), iv0, av0);
                        av1 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count), iv1, av1);
                        av2 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count * 2), iv2, av2);
                    }
                    else
                    {
                        av0 = Sse.Add(av0, Sse.Multiply(iv0, Sse.LoadVector128(mp)));
                        av1 = Sse.Add(av1, Sse.Multiply(iv1, Sse.LoadVector128(mp + Vector128 <float> .Count)));
                        av2 = Sse.Add(av2, Sse.Multiply(iv2, Sse.LoadVector128(mp + Vector128 <float> .Count * 2)));
                    }
                    mp += Vector128 <float> .Count * channels;
                }

                var avs0 = Sse.Add(Sse.Add(
                                       Sse.Shuffle(av0, av0, 0b_00_10_01_11),
                                       Sse.Shuffle(av1, av1, 0b_00_01_11_10)),
                                   Sse.Shuffle(av2, av2, 0b_00_11_10_01)
                                   );
                var avs1 = Sse3.IsSupported ?
                           Sse3.MoveHighAndDuplicate(avs0) :
                           Sse.Shuffle(avs0, avs0, 0b_11_11_01_01);
                var avs2 = Sse.UnpackHigh(avs0, avs0);

                tp[0] = Sse.AddScalar(av0, avs0).ToScalar();
                tp[1] = Sse.AddScalar(av1, avs1).ToScalar();
                tp[2] = Sse.AddScalar(av2, avs2).ToScalar();
                tp   += tstride;
            }
        }
Esempio n. 28
0
 public static __m128 _mm_add_ss(__m128 a, __m128 b) => Sse.AddScalar(a, b);
Esempio n. 29
0
 public static Vector128 <float> _mm_add_ss(Vector128 <float> left, Vector128 <float> right)
 {
     return(Sse.AddScalar(left, right));
 }