예제 #1
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Sse.IsSupported)
            {
                using (TestTable <float> floatTable = new TestTable <float>(new float[4] {
                    1, -5, 100, 0
                }, new float[4] {
                    22, -1, -50, 0
                }, new float[4]))
                {
                    var vf1 = Unsafe.Read <Vector128 <float> >(floatTable.inArray1Ptr);
                    var vf2 = Unsafe.Read <Vector128 <float> >(floatTable.inArray2Ptr);
                    var vf3 = Sse.CompareGreaterThan(vf1, vf2);
                    Unsafe.Write(floatTable.outArrayPtr, vf3);

                    if (!floatTable.CheckResult((x, y, z) => BitConverter.SingleToInt32Bits(z) == ((x > y) ? -1 : 0)))
                    {
                        Console.WriteLine("SSE CompareGreaterThan failed on float:");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }
                }
            }


            return(testResult);
        }
예제 #2
0
            public void RunStructFldScenario(SimpleBinaryOpTest__CompareGreaterThanSingle testClass)
            {
                var result = Sse.CompareGreaterThan(_fld1, _fld2);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
            }
예제 #3
0
        public void RunClassFldScenario()
        {
            var result = Sse.CompareGreaterThan(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
예제 #4
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Sse.CompareGreaterThan(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #5
0
        public void RunClassLclFldScenario()
        {
            var test   = new SimpleBinaryOpTest__CompareGreaterThanSingle();
            var result = Sse.CompareGreaterThan(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #6
0
        public static Vector4F GreaterThanOrEqual(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse.IsSupported)
            {
                return(Sse.CompareGreaterThan(left, right));
            }

            return(GreaterThanOrEqual_Software(left, right));
        }
예제 #7
0
        public static Vector128 <float> CompareGreaterThan(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse.IsSupported)
            {
                return(Sse.CompareGreaterThan(left, right));
            }

            return(CompareGreaterThan_Software(left, right));
        }
예제 #8
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Sse.CompareGreaterThan(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
예제 #9
0
        public void RunLclVarScenario_LoadAligned()
        {
            var left   = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr));
            var right  = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.CompareGreaterThan(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
예제 #10
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var left   = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr);
            var result = Sse.CompareGreaterThan(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
예제 #11
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Sse.CompareGreaterThan(
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #12
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Sse.CompareGreaterThan(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #13
0
        public void RunClsVarScenario()
        {
            var result = Sse.CompareGreaterThan(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
예제 #14
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Sse.CompareGreaterThan(
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)),
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #15
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr);
            var result = Sse.CompareGreaterThan(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
예제 #16
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr));
            var op2    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.CompareGreaterThan(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var left   = Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr));
            var right  = Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.CompareGreaterThan(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
예제 #18
0
        internal static bool IsIntersectingSse(Vector128 <float> a, Vector128 <float> b)
        {
            var aMin = Sse.MoveLowToHigh(a, a);
            var aMax = Sse.MoveHighToLow(a, a);
            var bMin = Sse.MoveLowToHigh(b, b);
            var bMax = Sse.MoveHighToLow(b, b);
            var lt   = Sse.CompareGreaterThan(aMin, bMax);
            var gt   = Sse.CompareLessThan(aMax, bMin);
            var oob  = Sse.Or(gt, lt);

            return(Sse.MoveMask(oob) == 0);
        }
예제 #19
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Sse.CompareGreaterThan(
                Sse.LoadVector128((Single *)(&test._fld1)),
                Sse.LoadVector128((Single *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #20
0
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__CompareGreaterThanSingle testClass)
            {
                fixed(Vector128 <Single> *pFld1 = &_fld1)
                fixed(Vector128 <Single> *pFld2 = &_fld2)
                {
                    var result = Sse.CompareGreaterThan(
                        Sse.LoadVector128((Single *)(pFld1)),
                        Sse.LoadVector128((Single *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
예제 #21
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Single> *pFld1 = &_fld1)
            fixed(Vector128 <Single> *pFld2 = &_fld2)
            {
                var result = Sse.CompareGreaterThan(
                    Sse.LoadVector128((Single *)(pFld1)),
                    Sse.LoadVector128((Single *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
예제 #22
0
    static bool TestSseCompareGreaterThan()
    {
        if (Sse.IsSupported)
        {
            const int expectedResult = 0b0100;

            Vector128 <float> value1 = Vector128.Create(float.NaN, 1.0f, 2.0f, 3.0f);
            Vector128 <float> value2 = Vector128.Create(0.0f, 2.0f, 1.0f, 3.0f);
            Vector128 <float> result = Sse.CompareGreaterThan(value1, value2);

            int actualResult = Sse.MoveMask(result);

            if (actualResult != expectedResult)
            {
                Console.WriteLine($"{nameof(Sse)}.{nameof(Sse.CompareGreaterThan)}({value1}, {value2}) returned {Convert.ToString(actualResult, 2)}; expected {Convert.ToString(expectedResult, 2)}");
                return(false);
            }
        }
        return(true);
    }
예제 #23
0
        public Intro()
        {
            var middleVector = Vector128.Create(1.0f);                      // middleVector = <1,1,1,1>

            middleVector = Vector128.CreateScalar(-1.0f);                   // middleVector = <-1,0,0,0>
            var floatBytes = Vector64.AsByte(Vector64.Create(1.0f, -1.0f)); // floatBytes = <0, 0, 128, 63, 0, 0, 128, 191>

            if (Avx.IsSupported)
            {
                var left  = Vector256.Create(-2.5f);                     // <-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5>
                var right = Vector256.Create(5.0f);                      // <5, 5, 5, 5, 5, 5, 5, 5>
                Vector256 <float> result = Avx.AddSubtract(left, right); // result = <-7.5, 2.5, -7.5, 2.5, -7.5, 2.5, -7.5, 2.5>xit
                left   = Vector256.Create(-1.0f, -2.0f, -3.0f, -4.0f, -50.0f, -60.0f, -70.0f, -80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 4.0f, 50.0f, 60.0f, 70.0f, 80.0f);
                result = Avx.UnpackHigh(left, right);              // result = <-3, 3, -4, 4, -70, 70, -80, 80>
                result = Avx.UnpackLow(left, right);               // result = <-1, 1, -2, 2, -50, 50, -60, 60>
                result = Avx.DotProduct(left, right, 0b1111_0001); // result = <-30, 0, 0, 0, -17400, 0, 0, 0>
                bool testResult = Avx.TestC(left, right);          // testResult = true
                testResult = Avx.TestC(right, left);               // testResult = false
                Vector256 <float> result1 = Avx.Divide(left, right);
                var plusOne = Vector256.Create(1.0f);
                result = Avx.Compare(right, result1, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                result = Avx.Compare(right, result1, FloatComparisonMode.UnorderedNotLessThanNonSignaling);
                left   = Vector256.Create(0.0f, 3.0f, -3.0f, 4.0f, -50.0f, 60.0f, -70.0f, 80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> nanInFirstPosition = Avx.Divide(left, right);
                left = Vector256.Create(1.1f, 3.3333333f, -3.0f, 4.22f, -50.0f, 60.0f, -70.0f, 80.0f);
                Vector256 <float> InfInFirstPosition = Avx.Divide(left, right);

                left  = Vector256.Create(-1.1f, 3.0f, 1.0f / 3.0f, MathF.PI, -50.0f, 60.0f, -70.0f, 80.0f);
                right = Vector256.Create(0.0f, 2.0f, 3.1f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> compareResult = Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                Vector256 <float> mixed         = Avx.BlendVariable(left, right, compareResult);                                //  mixed = <-1, 2, -3, 2, -50, -60, -70, -80>

                //left = Vector256.Create(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f);
                //right = Vector256.Create(1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f);
                Vector256 <float> other = right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                bool bRes    = Avx.TestZ(plusOne, compareResult);
                bool bRes2   = Avx.TestC(plusOne, compareResult);
                bool allTrue = !Avx.TestZ(compareResult, compareResult);
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.OrderedEqualNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.UnorderedEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                var left128  = Vector128.Create(1.0f, 2.0f, 3.0f, 4.0f);
                var right128 = Vector128.Create(2.0f, 3.0f, 4.0f, 5.0f);
                Vector128 <float> compResult128 = Sse.CompareGreaterThan(left128, right128); // compResult128 = <0, 0, 0, 0>

                int res = Avx.MoveMask(compareResult);
                if (Fma.IsSupported)
                {
                    Vector256 <float> resultFma = Fma.MultiplyAdd(left, right, other); // = left * right + other for each element
                    resultFma = Fma.MultiplyAddNegated(left, right, other);            // = -(left * right + other) for each element
                    resultFma = Fma.MultiplySubtract(left, right, other);              // = left * right - other for each element
                    Fma.MultiplyAddSubtract(left, right, other);                       // even elements (0, 2, ...) like MultiplyAdd, odd elements like MultiplySubtract
                }
                result = Avx.DotProduct(left, right, 0b1010_0001);                     // result = <-20, 0, 0, 0, -10000, 0, 0, 0>
                result = Avx.Floor(left);                                              // result = <-3, -3, -3, -3, -3, -3, -3, -3>
                result = Avx.Add(left, right);                                         // result = <2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5>
                result = Avx.Ceiling(left);                                            // result = <-2, -2, -2, -2, -2, -2, -2, -2>
                result = Avx.Multiply(left, right);                                    // result = <-12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5>
                result = Avx.HorizontalAdd(left, right);                               // result = <-5, -5, 10, 10, -5, -5, 10, 10>
                result = Avx.HorizontalSubtract(left, right);                          // result = <0, 0, 0, 0, 0, 0, 0, 0>
                double[] someDoubles      = new double[] { 1.0, 3.0, -2.5, 7.5, 10.8, 0.33333 };
                double[] someOtherDoubles = new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
                double[] someResult       = new double[someDoubles.Length];
                float[]  someFloats       = new float[] { 1, 2, 3, 4, 10, 20, 30, 40, 0 };
                float[]  someOtherFloats  = new float[] { 1, 1, 1, 1, 1, 1, 1, 1 };
                unsafe
                {
                    fixed(double *ptr = &someDoubles[1])
                    {
                        fixed(double *ptr2 = &someResult[0])
                        {
                            Vector256 <double> res2 = Avx.LoadVector256(ptr); // res2 = <3, -2.5, 7.5, 10.8>

                            Avx.Store(ptr2, res2);
                        }
                    }

                    fixed(float *ptr = &someFloats[0])
                    {
                        fixed(float *ptr2 = &someOtherFloats[0])
                        {
                            Vector256 <float> res2 = Avx.DotProduct(Avx.LoadVector256(ptr), Avx.LoadVector256(ptr2), 0b0001_0001);
                            //Avx.Store(ptr2, res2);
                        }
                    }
                }
            }
        }
예제 #24
0
    public void ResizeBicubic(FastBitmap rtnImage)
    {
        float scaleX = (float)this.width / rtnImage.width;
        float scaleY = (float)this.height / rtnImage.height;

        if (scaleX > 1 || scaleY > 1)
        {
            throw new Exception("拡大のみ対応");
        }

        float[] tmpa = new float[rtnImage.width * 4 * this.height];
        fixed(float *tmpp = tmpa)
        {
            float *tmp     = tmpp;
            var    _00mask = Vector128.Create(0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255);
            var    _01mask = Vector128.Create(4, 255, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255, 7, 255, 255, 255);
            var    _10mask = Vector128.Create(8, 255, 255, 255, 9, 255, 255, 255, 10, 255, 255, 255, 11, 255, 255, 255);
            var    _11mask = Vector128.Create(12, 255, 255, 255, 13, 255, 255, 255, 14, 255, 255, 255, 15, 255, 255, 255);
            var    _vmask  = Vector128.Create(0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255);

            var _1012  = Vector128.Create(-1, 0, 1, 2);
            var _0123i = Vector128.Create(0, 1, 2, 3);

            var _0000   = Vector128.Create(0, 0, 0, 0);
            var _0000f  = Vector128.Create(0f, 0, 0, 0);
            var _255f   = Vector128.Create(255f, 255, 255, 255);
            var _1111   = Vector128.Create(1, 1, 1, 1);
            var _1111f  = Vector128.Create(1f, 1, 1, 1);
            var _4444f  = Vector128.Create(4f, 4, 4, 4);
            var _4444   = Vector128.Create(4, 4, 4, 4);
            var _5555f  = Vector128.Create(5f, 5, 5, 5);
            var _2222f  = Vector128.Create(2f, 2, 2, 2);
            var _8888f  = Vector128.Create(8f, 8, 8, 8);
            var _7f     = Vector128.Create(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff).AsSingle();
            var _ff     = Vector128.Create(-1, -1, -1, -1);
            var _stride = Vector128.Create(rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4);

            Parallel.For(0, this.height, (y) =>
            {
                float py      = (y * scaleY);
                float *tmpPos = tmp + y * rtnImage.width * 4;
                for (int x = 0; x < rtnImage.width; x++)
                {
                    float px = (x * scaleX);
                    int sx   = (int)px;

                    var _px = Vector128.CreateScalar(px);
                    _px     = Sse.Shuffle(_px, _px, 0);

                    var _sx = Vector128.CreateScalar(sx);
                    _sx     = Sse2.Shuffle(_sx, 0);

                    var _width = Vector128.CreateScalar(this.width);
                    _width     = Sse2.Shuffle(_width, 0);

                    var _x2 = Sse2.Add(_sx, _1012);

                    var _d  = Sse.And(Sse.Subtract(_px, Sse2.ConvertToVector128Single(_x2)), _7f);
                    var _d2 = Sse.Multiply(_d, _d);
                    var _d3 = Sse.Multiply(_d2, _d);

                    var w1   = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2)));
                    var w2   = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3);
                    var wb   = Sse2.CompareGreaterThan(_d, _1111f);
                    var _w   = Sse41.BlendVariable(w1, w2, wb);
                    var _xpb = Sse2.Or(Sse2.CompareLessThan(_x2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_x2, _width), _1111).AsInt32(), _ff));
                    var _xpp = Sse2.And(_sx, _xpb);
                    var _xp  = Sse41.BlendVariable(_x2, _xpp, _xpb);

                    var p = Avx2.GatherVector128((uint *)(this._ptr + this._stride * y), _xp, 4).AsByte();


                    var _p0 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _00mask).AsInt32());
                    var _p1 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _01mask).AsInt32());
                    var _p2 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _10mask).AsInt32());
                    var _p3 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _11mask).AsInt32());

                    var _w0 = Sse.Shuffle(_w, _w, 0);
                    var _w1 = Sse.Shuffle(_w, _w, 0b01010101);
                    var _w2 = Sse.Shuffle(_w, _w, 0b10101010);
                    var _w3 = Sse.Shuffle(_w, _w, 0b11111111);

                    var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3)));

                    Sse2.Store(tmpPos + x * 4, rgbaf);
                }
            });

            Parallel.For(0, rtnImage.height, (y) =>
            {
                float py = (y * scaleY);
                int sy   = (int)py;

                uint *store = stackalloc uint[4];

                var _py = Vector128.CreateScalar(py);
                _py     = Sse.Shuffle(_py, _py, 0);

                var _sy = Vector128.CreateScalar(sy);
                _sy     = Sse2.Shuffle(_sy, 0);

                var _height = Vector128.CreateScalar(this.height);
                _height     = Sse2.Shuffle(_height, 0);

                var _y2 = Sse2.Add(_sy, _1012);

                var _d  = Sse.And(Sse.Subtract(_py, Sse2.ConvertToVector128Single(_y2)), _7f);
                var _d2 = Sse.Multiply(_d, _d);
                var _d3 = Sse.Multiply(_d2, _d);

                var w1 = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2)));
                var w2 = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3);
                var wb = Sse2.CompareGreaterThan(_d, _1111f);
                var _w = Sse41.BlendVariable(w1, w2, wb);


                var _ypb = Sse2.Or(Sse2.CompareLessThan(_y2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_y2, _height), _1111).AsInt32(), _ff));
                var _ypp = Sse2.And(_sy, _ypb);
                var _yp  = Sse41.BlendVariable(_y2, _ypp, _ypb);
                var _yps = Sse41.MultiplyLow(_yp, _stride);

                var _yp0  = Sse2.Add(Sse2.Shuffle(_yps, 0), _0123i);
                var _yp1  = Sse2.Add(Sse2.Shuffle(_yps, 0b01010101), _0123i);
                var _yp2  = Sse2.Add(Sse2.Shuffle(_yps, 0b10101010), _0123i);
                var _yp3  = Sse2.Add(Sse2.Shuffle(_yps, 0b11111111), _0123i);
                uint *rtn = (uint *)(rtnImage._ptr + rtnImage._stride * y);

                for (int x = 0; x < rtnImage.width; x++)
                {
                    var _p0 = Avx2.GatherVector128((float *)(tmp), _yp0, 4);
                    var _p1 = Avx2.GatherVector128((float *)(tmp), _yp1, 4);
                    var _p2 = Avx2.GatherVector128((float *)(tmp), _yp2, 4);
                    var _p3 = Avx2.GatherVector128((float *)(tmp), _yp3, 4);

                    var _w0 = Sse.Shuffle(_w, _w, 0);
                    var _w1 = Sse.Shuffle(_w, _w, 0b01010101);
                    var _w2 = Sse.Shuffle(_w, _w, 0b10101010);
                    var _w3 = Sse.Shuffle(_w, _w, 0b11111111);

                    var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3)));

                    var _b0 = Sse.CompareLessThan(rgbaf, _0000f);
                    rgbaf   = Sse41.BlendVariable(rgbaf, _0000f, _b0);
                    var _b1 = Sse.CompareGreaterThan(rgbaf, _255f);
                    rgbaf   = Sse41.BlendVariable(rgbaf, _255f, _b1);

                    var rgbab = Sse2.ConvertToVector128Int32(rgbaf).AsByte();
                    var rgba  = Ssse3.Shuffle(rgbab, _vmask).AsUInt32();

                    Sse2.Store(store, rgba);

                    _yp0 = Sse2.Add(_yp0, _4444);
                    _yp1 = Sse2.Add(_yp1, _4444);
                    _yp2 = Sse2.Add(_yp2, _4444);
                    _yp3 = Sse2.Add(_yp3, _4444);
                    *rtn = *store;
                    rtn++;
                }
            });
 public static m32 GreaterThan(f32 lhs, f32 rhs) => Sse.CompareGreaterThan(lhs, rhs).AsInt32();
예제 #26
0
        unsafe void IConvolver.SharpenLine(byte *cstart, byte *ystart, byte *bstart, byte *ostart, int ox, int ow, float amt, float thresh, bool gamma)
        {
            float *ip = (float *)cstart + (uint)ox * channels, yp = (float *)ystart + (uint)ox, bp = (float *)bstart, op = (float *)ostart;
            float *ipe = ip + (uint)ow * channels;

            bool threshold = thresh > 0f;

            if (Avx.IsSupported && ip <= ipe - VectorAvx.Count)
            {
                var vthresh = Vector256.Create(threshold ? thresh : -1f);
                var vmsk    = Vector256.Create(0x7fffffff).AsSingle();
                var vamt    = Vector256.Create(amt);
                var vmin    = VectorAvx.Zero;

                ipe -= VectorAvx.Count;
                do
                {
                    var vd = Avx.Subtract(Avx.LoadVector256(yp), Avx.LoadVector256(bp));
                    yp += VectorAvx.Count;
                    bp += VectorAvx.Count;

                    if (threshold)
                    {
                        var sm = HWIntrinsics.AvxCompareGreaterThan(Avx.And(vd, vmsk), vthresh);
                        vd = Avx.And(vd, sm);
                    }
                    vd = Avx.Multiply(vd, vamt);

                    var v0 = Avx.LoadVector256(ip);
                    ip += VectorAvx.Count;

                    if (gamma)
                    {
                        v0 = Avx.Max(v0, vmin);
                        v0 = Avx.Multiply(v0, Avx.ReciprocalSqrt(v0));
                        v0 = Avx.Add(v0, vd);
                        v0 = Avx.Max(v0, vmin);
                        v0 = Avx.Multiply(v0, v0);
                    }
                    else
                    {
                        v0 = Avx.Add(v0, vd);
                    }

                    Avx.Store(op, v0);
                    op += VectorAvx.Count;
                } while (ip <= ipe);
                ipe += VectorAvx.Count;
            }
            else if (ip <= ipe - VectorSse.Count)
            {
                var vthresh = Vector128.Create(threshold ? thresh : -1f);
                var vmsk    = Vector128.Create(0x7fffffff).AsSingle();
                var vamt    = Vector128.Create(amt);
                var vmin    = VectorSse.Zero;

                ipe -= VectorSse.Count;
                do
                {
                    var vd = Sse.Subtract(Sse.LoadVector128(yp), Sse.LoadVector128(bp));
                    yp += VectorSse.Count;
                    bp += VectorSse.Count;

                    if (threshold)
                    {
                        var sm = Sse.CompareGreaterThan(Sse.And(vd, vmsk), vthresh);
                        vd = Sse.And(vd, sm);
                    }
                    vd = Sse.Multiply(vd, vamt);

                    var v0 = Sse.LoadVector128(ip);
                    ip += VectorSse.Count;

                    if (gamma)
                    {
                        v0 = Sse.Max(v0, vmin);
                        v0 = Sse.Multiply(v0, Sse.ReciprocalSqrt(v0));
                        v0 = Sse.Add(v0, vd);
                        v0 = Sse.Max(v0, vmin);
                        v0 = Sse.Multiply(v0, v0);
                    }
                    else
                    {
                        v0 = Sse.Add(v0, vd);
                    }

                    Sse.Store(op, v0);
                    op += VectorSse.Count;
                } while (ip <= ipe);
                ipe += VectorSse.Count;
            }

            float fmin = VectorSse.Zero.ToScalar();

            while (ip < ipe)
            {
                float dif = *yp++ - *bp++;
                float c0  = *ip++;

                if (!threshold || Math.Abs(dif) > thresh)
                {
                    dif *= amt;

                    if (gamma)
                    {
                        c0  = MathUtil.MaxF(c0, fmin).Sqrt();
                        c0  = MathUtil.MaxF(c0 + dif, fmin);
                        c0 *= c0;
                    }
                    else
                    {
                        c0 += dif;
                    }
                }

                *op++ = c0;
            }
        }
예제 #27
0
 public static Vector128 <float> _mm_cmpgt_ps(Vector128 <float> left, Vector128 <float> right)
 {
     return(Sse.CompareGreaterThan(left, right));
 }
예제 #28
0
 public static Vector128 <float> op_GreaterThan(Vector128 <float> left, Vector128 <float> right)
 => Sse.CompareGreaterThan(left, right);