Exemplo n.º 1
0
            public void RunStructFldScenario(SimpleBinaryOpTest__CompareLessThanSingle testClass)
            {
                var result = Sse.CompareLessThan(_fld1, _fld2);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
            }
Exemplo n.º 2
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Sse.IsSupported)
            {
                using (TestTable <float> floatTable = new TestTable <float>(new float[4] {
                    1, -5, 100, 0
                }, new float[4] {
                    22, -1, -50, 0
                }, new float[4]))
                {
                    var vf1 = Unsafe.Read <Vector128 <float> >(floatTable.inArray1Ptr);
                    var vf2 = Unsafe.Read <Vector128 <float> >(floatTable.inArray2Ptr);
                    var vf3 = Sse.CompareLessThan(vf1, vf2);
                    Unsafe.Write(floatTable.outArrayPtr, vf3);

                    if (!floatTable.CheckResult((x, y, z) => BitConverter.SingleToInt32Bits(z) == ((x < y) ? -1 : 0)))
                    {
                        Console.WriteLine("SSE CompareLessThan failed on float:");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }
                }
            }


            return(testResult);
        }
Exemplo n.º 3
0
        public void RunFldScenario()
        {
            var result = Sse.CompareLessThan(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
Exemplo n.º 4
0
        public void RunLclFldScenario()
        {
            var test   = new SimpleBinaryOpTest__CompareLessThanSingle();
            var result = Sse.CompareLessThan(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Exemplo n.º 5
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Sse.CompareLessThan(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Sse.CompareLessThan(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
Exemplo n.º 7
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var left   = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr);
            var result = Sse.CompareLessThan(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Exemplo n.º 8
0
        public void RunLclVarScenario_LoadAligned()
        {
            var left   = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr));
            var right  = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.CompareLessThan(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Exemplo n.º 9
0
        public static Vector128 <float> CompareLessThan(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse.IsSupported)
            {
                return(Sse.CompareLessThan(left, right));
            }

            return(CompareLessThan_Software(left, right));
        }
Exemplo n.º 10
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Sse.CompareLessThan(
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)),
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Exemplo n.º 11
0
        public void RunClsVarScenario()
        {
            var result = Sse.CompareLessThan(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleBinaryOpTest__CompareLessThanSingle();
            var result = Sse.CompareLessThan(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Exemplo n.º 13
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Sse.CompareLessThan(
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Sse.CompareLessThan(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr);
            var result = Sse.CompareLessThan(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var left   = Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr));
            var right  = Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.CompareLessThan(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr));
            var op2    = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr));
            var result = Sse.CompareLessThan(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
Exemplo n.º 18
0
        internal static bool IsIntersectingSse(Vector128 <float> a, Vector128 <float> b)
        {
            var aMin = Sse.MoveLowToHigh(a, a);
            var aMax = Sse.MoveHighToLow(a, a);
            var bMin = Sse.MoveLowToHigh(b, b);
            var bMax = Sse.MoveHighToLow(b, b);
            var lt   = Sse.CompareGreaterThan(aMin, bMax);
            var gt   = Sse.CompareLessThan(aMax, bMin);
            var oob  = Sse.Or(gt, lt);

            return(Sse.MoveMask(oob) == 0);
        }
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Sse.CompareLessThan(
                Sse.LoadVector128((Single *)(&test._fld1)),
                Sse.LoadVector128((Single *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
 public static f32 Ceil_f32(f32 a)
 {
     if (Sse41.IsSupported)
     {
         return(Sse41.RoundToPositiveInfinity(a));
     }
     else
     {
         f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a));
         f32 cmp  = Sse.CompareLessThan(fval, a);
         return(Sse.Add(fval, Sse.And(cmp, Vector128.Create(1f))));
     }
 }
 public static f32 Floor_f32(f32 a)
 {
     if (Sse41.IsSupported)
     {
         return(Sse41.RoundToNegativeInfinity(a));
     }
     else
     {
         f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a));
         f32 cmp  = Sse.CompareLessThan(a, fval);
         return(Sse.Subtract(fval, Sse.And(cmp, Vector128.Create(1f))));
     }
 }
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__CompareLessThanSingle testClass)
            {
                fixed(Vector128 <Single> *pFld1 = &_fld1)
                fixed(Vector128 <Single> *pFld2 = &_fld2)
                {
                    var result = Sse.CompareLessThan(
                        Sse.LoadVector128((Single *)(pFld1)),
                        Sse.LoadVector128((Single *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Single> *pFld1 = &_fld1)
            fixed(Vector128 <Single> *pFld2 = &_fld2)
            {
                var result = Sse.CompareLessThan(
                    Sse.LoadVector128((Single *)(pFld1)),
                    Sse.LoadVector128((Single *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
 public static m32 LessThan(f32 lhs, f32 rhs) => Sse.CompareLessThan(lhs, rhs).AsInt32();
Exemplo n.º 25
0
 public static Vector128 <float> op_LessThan(Vector128 <float> left, Vector128 <float> right)
 => Sse.CompareLessThan(left, right);
Exemplo n.º 26
0
 public static __m128 _mm_cmplt_ps(__m128 a, __m128 b) => Sse.CompareLessThan(a, b);
Exemplo n.º 27
0
    public void ResizeBicubic(FastBitmap rtnImage)
    {
        float scaleX = (float)this.width / rtnImage.width;
        float scaleY = (float)this.height / rtnImage.height;

        if (scaleX > 1 || scaleY > 1)
        {
            throw new Exception("拡大のみ対応");
        }

        float[] tmpa = new float[rtnImage.width * 4 * this.height];
        fixed(float *tmpp = tmpa)
        {
            float *tmp     = tmpp;
            var    _00mask = Vector128.Create(0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255);
            var    _01mask = Vector128.Create(4, 255, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255, 7, 255, 255, 255);
            var    _10mask = Vector128.Create(8, 255, 255, 255, 9, 255, 255, 255, 10, 255, 255, 255, 11, 255, 255, 255);
            var    _11mask = Vector128.Create(12, 255, 255, 255, 13, 255, 255, 255, 14, 255, 255, 255, 15, 255, 255, 255);
            var    _vmask  = Vector128.Create(0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255);

            var _1012  = Vector128.Create(-1, 0, 1, 2);
            var _0123i = Vector128.Create(0, 1, 2, 3);

            var _0000   = Vector128.Create(0, 0, 0, 0);
            var _0000f  = Vector128.Create(0f, 0, 0, 0);
            var _255f   = Vector128.Create(255f, 255, 255, 255);
            var _1111   = Vector128.Create(1, 1, 1, 1);
            var _1111f  = Vector128.Create(1f, 1, 1, 1);
            var _4444f  = Vector128.Create(4f, 4, 4, 4);
            var _4444   = Vector128.Create(4, 4, 4, 4);
            var _5555f  = Vector128.Create(5f, 5, 5, 5);
            var _2222f  = Vector128.Create(2f, 2, 2, 2);
            var _8888f  = Vector128.Create(8f, 8, 8, 8);
            var _7f     = Vector128.Create(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff).AsSingle();
            var _ff     = Vector128.Create(-1, -1, -1, -1);
            var _stride = Vector128.Create(rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4);

            Parallel.For(0, this.height, (y) =>
            {
                float py      = (y * scaleY);
                float *tmpPos = tmp + y * rtnImage.width * 4;
                for (int x = 0; x < rtnImage.width; x++)
                {
                    float px = (x * scaleX);
                    int sx   = (int)px;

                    var _px = Vector128.CreateScalar(px);
                    _px     = Sse.Shuffle(_px, _px, 0);

                    var _sx = Vector128.CreateScalar(sx);
                    _sx     = Sse2.Shuffle(_sx, 0);

                    var _width = Vector128.CreateScalar(this.width);
                    _width     = Sse2.Shuffle(_width, 0);

                    var _x2 = Sse2.Add(_sx, _1012);

                    var _d  = Sse.And(Sse.Subtract(_px, Sse2.ConvertToVector128Single(_x2)), _7f);
                    var _d2 = Sse.Multiply(_d, _d);
                    var _d3 = Sse.Multiply(_d2, _d);

                    var w1   = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2)));
                    var w2   = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3);
                    var wb   = Sse2.CompareGreaterThan(_d, _1111f);
                    var _w   = Sse41.BlendVariable(w1, w2, wb);
                    var _xpb = Sse2.Or(Sse2.CompareLessThan(_x2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_x2, _width), _1111).AsInt32(), _ff));
                    var _xpp = Sse2.And(_sx, _xpb);
                    var _xp  = Sse41.BlendVariable(_x2, _xpp, _xpb);

                    var p = Avx2.GatherVector128((uint *)(this._ptr + this._stride * y), _xp, 4).AsByte();


                    var _p0 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _00mask).AsInt32());
                    var _p1 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _01mask).AsInt32());
                    var _p2 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _10mask).AsInt32());
                    var _p3 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _11mask).AsInt32());

                    var _w0 = Sse.Shuffle(_w, _w, 0);
                    var _w1 = Sse.Shuffle(_w, _w, 0b01010101);
                    var _w2 = Sse.Shuffle(_w, _w, 0b10101010);
                    var _w3 = Sse.Shuffle(_w, _w, 0b11111111);

                    var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3)));

                    Sse2.Store(tmpPos + x * 4, rgbaf);
                }
            });

            Parallel.For(0, rtnImage.height, (y) =>
            {
                float py = (y * scaleY);
                int sy   = (int)py;

                uint *store = stackalloc uint[4];

                var _py = Vector128.CreateScalar(py);
                _py     = Sse.Shuffle(_py, _py, 0);

                var _sy = Vector128.CreateScalar(sy);
                _sy     = Sse2.Shuffle(_sy, 0);

                var _height = Vector128.CreateScalar(this.height);
                _height     = Sse2.Shuffle(_height, 0);

                var _y2 = Sse2.Add(_sy, _1012);

                var _d  = Sse.And(Sse.Subtract(_py, Sse2.ConvertToVector128Single(_y2)), _7f);
                var _d2 = Sse.Multiply(_d, _d);
                var _d3 = Sse.Multiply(_d2, _d);

                var w1 = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2)));
                var w2 = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3);
                var wb = Sse2.CompareGreaterThan(_d, _1111f);
                var _w = Sse41.BlendVariable(w1, w2, wb);


                var _ypb = Sse2.Or(Sse2.CompareLessThan(_y2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_y2, _height), _1111).AsInt32(), _ff));
                var _ypp = Sse2.And(_sy, _ypb);
                var _yp  = Sse41.BlendVariable(_y2, _ypp, _ypb);
                var _yps = Sse41.MultiplyLow(_yp, _stride);

                var _yp0  = Sse2.Add(Sse2.Shuffle(_yps, 0), _0123i);
                var _yp1  = Sse2.Add(Sse2.Shuffle(_yps, 0b01010101), _0123i);
                var _yp2  = Sse2.Add(Sse2.Shuffle(_yps, 0b10101010), _0123i);
                var _yp3  = Sse2.Add(Sse2.Shuffle(_yps, 0b11111111), _0123i);
                uint *rtn = (uint *)(rtnImage._ptr + rtnImage._stride * y);

                for (int x = 0; x < rtnImage.width; x++)
                {
                    var _p0 = Avx2.GatherVector128((float *)(tmp), _yp0, 4);
                    var _p1 = Avx2.GatherVector128((float *)(tmp), _yp1, 4);
                    var _p2 = Avx2.GatherVector128((float *)(tmp), _yp2, 4);
                    var _p3 = Avx2.GatherVector128((float *)(tmp), _yp3, 4);

                    var _w0 = Sse.Shuffle(_w, _w, 0);
                    var _w1 = Sse.Shuffle(_w, _w, 0b01010101);
                    var _w2 = Sse.Shuffle(_w, _w, 0b10101010);
                    var _w3 = Sse.Shuffle(_w, _w, 0b11111111);

                    var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3)));

                    var _b0 = Sse.CompareLessThan(rgbaf, _0000f);
                    rgbaf   = Sse41.BlendVariable(rgbaf, _0000f, _b0);
                    var _b1 = Sse.CompareGreaterThan(rgbaf, _255f);
                    rgbaf   = Sse41.BlendVariable(rgbaf, _255f, _b1);

                    var rgbab = Sse2.ConvertToVector128Int32(rgbaf).AsByte();
                    var rgba  = Ssse3.Shuffle(rgbab, _vmask).AsUInt32();

                    Sse2.Store(store, rgba);

                    _yp0 = Sse2.Add(_yp0, _4444);
                    _yp1 = Sse2.Add(_yp1, _4444);
                    _yp2 = Sse2.Add(_yp2, _4444);
                    _yp3 = Sse2.Add(_yp3, _4444);
                    *rtn = *store;
                    rtn++;
                }
            });
Exemplo n.º 28
0
 public static Vector128 <float> _mm_cmplt_ps(Vector128 <float> left, Vector128 <float> right)
 {
     return(Sse.CompareLessThan(left, right));
 }