예제 #1
0
        public int TryParseSIMDUseCount(byte *p, int cnt, out int n)
        {
            var tmp  = Sse2.LoadVector128(p);
            var tmp1 = Sse.StaticCast <byte, sbyte>(tmp);

            tmp1 = Sse2.Subtract(tmp1, subtmp);

            var data0 = Ssse3.Shuffle(tmp1, mask0);


            var data1 = Ssse3.Shuffle(tmp1, mask1);


            var mul0 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), mul0Array[cnt]);
            var mul1 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data1), mul1Array[cnt]);
            var x    = Sse2.Add(mul0, mul1);

            x = Ssse3.HorizontalAdd(x, x);
            x = Ssse3.HorizontalAdd(x, x);

            n = Sse41.Extract(x, 3);



            var com0 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp0Array[cnt]), _9);
            var com1 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp1Array[cnt]), _9);

            var xx = Sse2.Add(com0, com1);

            xx = Ssse3.HorizontalAdd(xx, xx);
            xx = Ssse3.HorizontalAdd(xx, xx);

            return(Sse41.Extract(xx, 3));
        }
예제 #2
0
        private static Vector128 <int> MultiplyAddAdjacent(
            Vector128 <short> vsrc0,
            Vector128 <short> vsrc1,
            Vector128 <short> vsrc2,
            Vector128 <short> vsrc3,
            Vector128 <short> vfilter,
            Vector128 <int> zero)
        {
            // < sumN, sumN, sumN, sumN >
            Vector128 <int> sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter);
            Vector128 <int> sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter);
            Vector128 <int> sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter);
            Vector128 <int> sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter);

            // < 0, 0, sumN, sumN >
            sum0 = Ssse3.HorizontalAdd(sum0, zero);
            sum1 = Ssse3.HorizontalAdd(sum1, zero);
            sum2 = Ssse3.HorizontalAdd(sum2, zero);
            sum3 = Ssse3.HorizontalAdd(sum3, zero);

            // < 0, 0, 0, sumN >
            sum0 = Ssse3.HorizontalAdd(sum0, zero);
            sum1 = Ssse3.HorizontalAdd(sum1, zero);
            sum2 = Ssse3.HorizontalAdd(sum2, zero);
            sum3 = Ssse3.HorizontalAdd(sum3, zero);

            // < 0, 0, sum1, sum0 >
            Vector128 <int> sum01 = Sse2.UnpackLow(sum0, sum1);

            // < 0, 0, sum3, sum2 >
            Vector128 <int> sum23 = Sse2.UnpackLow(sum2, sum3);

            // < sum3, sum2, sum1, sum0 >
            return(Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32());
        }
예제 #3
0
        private unsafe int Sum_VectorizedSse2(ReadOnlySpan <int> source)
        {
            int result = 0;

            fixed(int *pSource = source)
            {
                Vector128 <int> vresult        = Vector128 <int> .Zero;
                int             i              = 0;
                int             lastBlockIndex = source.Length - (source.Length % 4);

                while (i < lastBlockIndex)
                {
                    vresult = Sse2.Add(vresult, Sse2.LoadVector128(pSource + i));
                    i      += 4;
                }
                if (Ssse3.IsSupported)
                {
                    vresult = Ssse3.HorizontalAdd(vresult, vresult);
                    vresult = Ssse3.HorizontalAdd(vresult, vresult);
                }
                else
                {
                    vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0x4E));
                    vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0xB1));
                }
                result = vresult.ToScalar();
                while (i < source.Length)
                {
                    result += pSource[i];
                    i++;
                }
            }

            return(result);
        }
예제 #4
0
        public unsafe double SumAmount(DoDPOLines_v4 source, int len)
        {
            double result;

            fixed(double *pSource = source.Amount)
            {
                Vector128 <double> vresult = Vector128 <double> .Zero;

                int i = 0;
                int lastBlockIndex = len - (len % 2);

                while (i < lastBlockIndex)
                {
                    vresult = Sse2.Add(vresult, Sse2.LoadVector128(pSource + i));
                    i      += 2;
                }

                vresult = Ssse3.HorizontalAdd(vresult, vresult);
                result  = vresult.ToScalar();

                while (i < len)
                {
                    result += pSource[i];
                    i      += 1;
                }
            }

            return(result);
        }
예제 #5
0
        public unsafe int CsSumVectorizedSse2UnsafeAs()
        {
            int result;

            Vector128 <int> vresult = Vector128 <int> .Zero;

            int i = 0;
            int lastBlockIndex = _source.Length - (_source.Length % 4);

            while (i < lastBlockIndex)
            {
                vresult = Sse2.Add(vresult, Unsafe.As <int, Vector128 <int> >(ref _source[i]));
                i      += 4;
            }

            if (Ssse3.IsSupported)
            {
                vresult = Ssse3.HorizontalAdd(vresult, vresult);
                vresult = Ssse3.HorizontalAdd(vresult, vresult);
            }
            else
            {
                vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0x4E));
                vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0xB1));
            }
            result = vresult.ToScalar();

            while (i < _source.Length)
            {
                result += _source[i];
                i      += 1;
            }

            return(result);
        }
예제 #6
0
        public void RunClassFldScenario()
        {
            var result = Ssse3.HorizontalAdd(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
예제 #7
0
            public void RunStructFldScenario(HorizontalBinaryOpTest__HorizontalAddInt16 testClass)
            {
                var result = Ssse3.HorizontalAdd(_fld1, _fld2);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
            }
예제 #8
0
        public override ulong Run(CancellationToken cancellationToken)
        {
            if (!Ssse3.IsSupported)
            {
                return(0uL);
            }

            var randomFloatingSpan = new Span <int>(new[] { randomInt, randomInt, randomInt, randomInt });
            var dst        = new Span <int>(Enumerable.Repeat(int.MaxValue / 2, 4).ToArray());
            var iterations = 0uL;

            unsafe
            {
                fixed(int *pdst = dst)
                fixed(int *psrc = randomFloatingSpan)
                {
                    var srcVector = Sse2.LoadVector128(psrc);
                    var dstVector = Sse2.LoadVector128(pdst);

                    while (!cancellationToken.IsCancellationRequested)
                    {
                        for (var j = 0; j < LENGTH; j++)
                        {
                            dstVector = Ssse3.HorizontalAdd(dstVector, srcVector);
                        }

                        Sse2.Store(pdst, dstVector);

                        iterations++;
                    }
                }
            }

            return(iterations);
        }
예제 #9
0
        public void RunClassLclFldScenario()
        {
            var test   = new HorizontalBinaryOpTest__HorizontalAddInt16();
            var result = Ssse3.HorizontalAdd(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #10
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Ssse3.HorizontalAdd(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #11
0
        public void RunLclVarScenario_LoadAligned()
        {
            var left   = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray1Ptr));
            var right  = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray2Ptr));
            var result = Ssse3.HorizontalAdd(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
예제 #12
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Ssse3.HorizontalAdd(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
예제 #13
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var left   = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray2Ptr);
            var result = Ssse3.HorizontalAdd(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
예제 #14
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Ssse3.HorizontalAdd(
                Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray1Ptr)),
                Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #15
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Ssse3.HorizontalAdd(
                Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new HorizontalBinaryOpTest__HorizontalAddInt32();
            var result = Ssse3.HorizontalAdd(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #17
0
        public void RunClsVarScenario()
        {
            var result = Ssse3.HorizontalAdd(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
예제 #18
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Ssse3.HorizontalAdd(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #19
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray2Ptr);
            var result = Ssse3.HorizontalAdd(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
예제 #20
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray1Ptr));
            var op2    = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray2Ptr));
            var result = Ssse3.HorizontalAdd(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
예제 #21
0
        public static Vector4Int32 HorizontalAdd(Vector4Int32Param1_3 left, Vector4Int32Param1_3 right)
        {
            if (Ssse3.IsSupported)
            {
                return(Ssse3.HorizontalAdd(left, right));
            }

            // TODO can Sse be used over the software fallback?

            return(HorizontalAdd_Software(left, right));
        }
예제 #22
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var left   = Sse2.LoadVector128((Int16 *)(_dataTable.inArray1Ptr));
            var right  = Sse2.LoadVector128((Int16 *)(_dataTable.inArray2Ptr));
            var result = Ssse3.HorizontalAdd(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Ssse3.HorizontalAdd(
                Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
        public void RunBasicScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));

            var result = Ssse3.HorizontalAdd(
                Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray1Ptr)),
                Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #25
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Ssse3.HorizontalAdd(
                Sse2.LoadVector128((Int16 *)(&test._fld1)),
                Sse2.LoadVector128((Int16 *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #26
0
            public void RunStructFldScenario_Load(HorizontalBinaryOpTest__HorizontalAddInt16 testClass)
            {
                fixed(Vector128 <Int16> *pFld1 = &_fld1)
                fixed(Vector128 <Int16> *pFld2 = &_fld2)
                {
                    var result = Ssse3.HorizontalAdd(
                        Sse2.LoadVector128((Int16 *)(pFld1)),
                        Sse2.LoadVector128((Int16 *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
예제 #27
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Int16> *pFld1 = &_fld1)
            fixed(Vector128 <Int16> *pFld2 = &_fld2)
            {
                var result = Ssse3.HorizontalAdd(
                    Sse2.LoadVector128((Int16 *)(pFld1)),
                    Sse2.LoadVector128((Int16 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
예제 #28
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector128 <Int32> *pClsVar1 = &_clsVar1)
            fixed(Vector128 <Int32> *pClsVar2 = &_clsVar2)
            {
                var result = Ssse3.HorizontalAdd(
                    Sse2.LoadVector128((Int32 *)(pClsVar1)),
                    Sse2.LoadVector128((Int32 *)(pClsVar2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
            }
        }
        public unsafe void SumVectorizedHardwareSse2()
        {
            if (!Sse2.IsSupported)
            {
                return;
            }

            int sum;

            fixed(int *pItems = _items)
            {
                var resultVector = Vector128 <int> .Zero;

                var i = 0;
                var lastBlockIndex = _items.Length - (_items.Length % 4);

                // sum unrolled block with vectors
                while (i < lastBlockIndex)
                {
                    resultVector = Sse2.Add(resultVector, Sse2.LoadVector128(pItems + i));
                    i           += 4;
                }

                if (Ssse3.IsSupported)
                {
                    resultVector = Ssse3.HorizontalAdd(resultVector, resultVector);
                    resultVector = Ssse3.HorizontalAdd(resultVector, resultVector);
                }
                else
                {
                    resultVector = Sse2.Add(resultVector, Sse2.Shuffle(resultVector, 0x4E));
                    resultVector = Sse2.Add(resultVector, Sse2.Shuffle(resultVector, 0xB1));
                }
                sum = resultVector.ToScalar();

                // sum the remaining items
                while (i < _items.Length)
                {
                    sum += pItems[i];
                    i   += 1;
                }
            }

            CheckSum(sum);
        }
예제 #30
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new HorizontalBinaryOpTest__HorizontalAddInt32();

            fixed(Vector128 <Int32> *pFld1 = &test._fld1)
            fixed(Vector128 <Int32> *pFld2 = &test._fld2)
            {
                var result = Ssse3.HorizontalAdd(
                    Sse2.LoadVector128((Int32 *)(pFld1)),
                    Sse2.LoadVector128((Int32 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
            }
        }