public int TryParseSIMDUseCount(byte *p, int cnt, out int n)
        {
            var tmp  = Sse2.LoadVector128(p);
            var tmp1 = Sse.StaticCast <byte, sbyte>(tmp);

            tmp1 = Sse2.Subtract(tmp1, subtmp);

            var data0 = Ssse3.Shuffle(tmp1, mask0);


            var data1 = Ssse3.Shuffle(tmp1, mask1);


            var mul0 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), mul0Array[cnt]);
            var mul1 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data1), mul1Array[cnt]);
            var x    = Sse2.Add(mul0, mul1);

            x = Ssse3.HorizontalAdd(x, x);
            x = Ssse3.HorizontalAdd(x, x);

            n = Sse41.Extract(x, 3);



            var com0 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp0Array[cnt]), _9);
            var com1 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp1Array[cnt]), _9);

            var xx = Sse2.Add(com0, com1);

            xx = Ssse3.HorizontalAdd(xx, xx);
            xx = Ssse3.HorizontalAdd(xx, xx);

            return(Sse41.Extract(xx, 3));
        }
Beispiel #2
0
        internal static Vector256 <T> MultiplyAddVector256(Vector256 <T> a, Vector256 <T> b, Vector256 <T> c)
        {
            if (typeof(T) == typeof(int))
            {
                var va = a.As <T, int>();
                var vb = b.As <T, int>();
                var vl = Avx2.MultiplyLow(va, vb);
                var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper());
                return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, int>()).As <int, T>());
            }
            if (typeof(T) == typeof(uint))
            {
                var va = a.As <T, uint>();
                var vb = b.As <T, uint>();
                var vl = Avx2.MultiplyLow(va, vb);
                var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper());
                return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, uint>()).As <uint, T>());
            }
            if (typeof(T) == typeof(float))
            {
                return(Fma.MultiplyAdd(a.As <T, float>(), b.As <T, float>(), c.As <T, float>()).As <float, T>());
            }
            if (typeof(T) == typeof(double))
            {
                return(Fma.MultiplyAdd(a.As <T, double>(), b.As <T, double>(), c.As <T, double>()).As <double, T>());
            }

            throw new NotSupportedException();
        }
Beispiel #3
0
        public void RunFldScenario()
        {
            var result = Sse41.MultiplyLow(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
            public void RunStructFldScenario(SimpleBinaryOpTest__MultiplyLowUInt32 testClass)
            {
                var result = Sse41.MultiplyLow(_fld1, _fld2);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
            }
Beispiel #5
0
        public void RunLclFldScenario()
        {
            var test   = new SimpleBinaryOpTest__MultiplyLowInt32();
            var result = Sse41.MultiplyLow(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Beispiel #6
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Sse41.MultiplyLow(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Beispiel #7
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var left   = Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr);
            var result = Sse41.MultiplyLow(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Beispiel #8
0
        public void RunLclVarScenario_LoadAligned()
        {
            var left   = Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray1Ptr));
            var right  = Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray2Ptr));
            var result = Sse41.MultiplyLow(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Sse41.MultiplyLow(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
Beispiel #10
0
        public void RunClsVarScenario()
        {
            var result = Sse41.MultiplyLow(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
Beispiel #11
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Sse41.MultiplyLow(
                Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray1Ptr)),
                Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Beispiel #12
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Sse41.MultiplyLow(
                Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleBinaryOpTest__MultiplyLowUInt32();
            var result = Sse41.MultiplyLow(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Sse41.MultiplyLow(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Beispiel #15
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var op1    = Sse2.LoadVector128((UInt32 *)(_dataTable.inArray1Ptr));
            var op2    = Sse2.LoadVector128((UInt32 *)(_dataTable.inArray2Ptr));
            var result = Sse41.MultiplyLow(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
Beispiel #16
0
        private unsafe ulong HashSse(byte *buf, int len)
        {
            ulong           h       = 0;
            Vector128 <int> v_ps    = Vector128 <int> .Zero;
            bool            useSse4 = Sse41.IsSupported;

            int i = 0;

            for (int j = len - i - 1; len - i >= 4; i += 4, j = len - i - 1)
            {
                Vector128 <int> c_v = Sse2.LoadVector128(&kMultFactorsPtr[j - 3]);
                c_v = Sse2.Shuffle(c_v, SO123);
                Vector128 <byte> q_v = Sse2.LoadVector128(buf + i);

                Vector128 <int> s_v;
                if (useSse4)
                {
                    s_v = Sse41.ConvertToVector128Int32(q_v);
                }
                else
                {
                    q_v = Sse2.UnpackLow(q_v, q_v);
                    s_v = Sse2.ShiftRightLogical(Sse2.UnpackLow(q_v.AsUInt16(), q_v.AsUInt16()).AsInt32(), 24);
                }

                if (useSse4)
                {
                    v_ps = Sse2.Add(v_ps, Sse41.MultiplyLow(c_v, s_v));
                }
                else
                {
                    Vector128 <ulong> v_tmp1 = Sse2.Multiply(c_v.AsUInt32(), s_v.AsUInt32());
                    Vector128 <ulong> v_tmp2 =
                        Sse2.Multiply(Sse2.ShiftRightLogical128BitLane(c_v.AsByte(), 4).AsUInt32(),
                                      Sse2.ShiftRightLogical128BitLane(s_v.AsByte(), 4).AsUInt32());
                    ;
                    v_ps = Sse2.Add(v_ps, Sse2.UnpackLow(Sse2.Shuffle(v_tmp1.AsInt32(), SOO2O),
                                                         Sse2.Shuffle(v_tmp2.AsInt32(), SOO2O)));
                }
            }

            v_ps = Sse2.Add(v_ps, Sse2.Shuffle(v_ps, S23O1));
            v_ps = Sse2.Add(v_ps, Sse2.Shuffle(v_ps, S1O32));
            h   += Sse2.ConvertToUInt32(v_ps.AsUInt32());

            for (; i < len; i++)
            {
                int   index = len - i - 1;
                ulong c     = (uint)kMultFactors[index];
                h += c * buf[i];
            }

            return(h & (kBase - 1));
        }
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var left   = Sse2.LoadAlignedVector128((UInt32 *)(_dataTable.inArray1Ptr));
            var right  = Sse2.LoadAlignedVector128((UInt32 *)(_dataTable.inArray2Ptr));
            var result = Sse41.MultiplyLow(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var left   = Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray2Ptr);
            var result = Sse41.MultiplyLow(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Beispiel #19
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Sse41.MultiplyLow(
                Sse2.LoadVector128((Int32 *)(_dataTable.inArray1Ptr)),
                Sse2.LoadVector128((Int32 *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Beispiel #20
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Sse41.MultiplyLow(
                Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Beispiel #21
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Sse41.MultiplyLow(
                Sse2.LoadVector128((UInt32 *)(&test._fld1)),
                Sse2.LoadVector128((UInt32 *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Beispiel #22
0
        public static Vector4Int32 Multiply(Vector4Int32Param1_3 left, Vector4Int32Param1_3 right)
        {
            if (Sse41.IsSupported)
            {
                return(Sse41.MultiplyLow(left, right));
            }
            // TODO try accelerate with less than < Sse4.1
            //else if (Sse2.IsSupported)
            //{
            //    Vector128<ulong> elem2And0 = Sse2.Multiply(left.AsUInt32(), right.AsUInt32());
            //}

            return(Multiply_Software(left, right));
        }
Beispiel #23
0
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__MultiplyLowUInt32 testClass)
            {
                fixed(Vector128 <UInt32> *pFld1 = &_fld1)
                fixed(Vector128 <UInt32> *pFld2 = &_fld2)
                {
                    var result = Sse41.MultiplyLow(
                        Sse2.LoadVector128((UInt32 *)(pFld1)),
                        Sse2.LoadVector128((UInt32 *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
Beispiel #24
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <UInt32> *pFld1 = &_fld1)
            fixed(Vector128 <UInt32> *pFld2 = &_fld2)
            {
                var result = Sse41.MultiplyLow(
                    Sse2.LoadVector128((UInt32 *)(pFld1)),
                    Sse2.LoadVector128((UInt32 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
Beispiel #25
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector128 <Int32> *pClsVar1 = &_clsVar1)
            fixed(Vector128 <Int32> *pClsVar2 = &_clsVar2)
            {
                var result = Sse41.MultiplyLow(
                    Sse2.LoadVector128((Int32 *)(pClsVar1)),
                    Sse2.LoadVector128((Int32 *)(pClsVar2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
            }
        }
Beispiel #26
0
        public static Vector128 <int>[,] Procedure(int[,] A, int[,] B, int a, Vector128 <int> x, Vector128 <int> y)
        {
            unsafe
            {
                int length  = A.GetLength(0);
                int length2 = B.GetLength(1);
                int length3 = 0;
                int length4 = 0;
                try
                {
                    length3 = length;
                    length4 = length2 / 4;
                }
                catch (Exception e)
                {
                    Console.WriteLine("Error");
                }
                Vector128 <int>[,] MatrixA = new Vector128 <int> [length3, length4];
                Vector128 <int>[,] MatrixB = new Vector128 <int> [length3, length4];
                Vector128 <int>[,] Matrix  = new Vector128 <int> [length3, length4];

                int c = 0;


                fixed(int *ptr = A)
                {
                    fixed(int *ptr2 = B)
                    {
                        for (int i = 0; i < length3; i++)
                        {
                            for (int j = 0; j < length4; j++)
                            {
                                var v  = Sse41.LoadVector128(ptr + c);
                                var v2 = Sse41.LoadVector128(ptr2 + c);
                                c            += 4;
                                MatrixA[i, j] = Sse41.MultiplyLow(v, x);
                                MatrixA[i, j] = Sse41.MultiplyLow(MatrixA[i, j], Vector128.Create(a));
                                MatrixB[i, j] = Sse41.MultiplyLow(v2, y);
                                Matrix[i, j]  = Sse41.Add(MatrixA[i, j], MatrixB[i, j]);
                            }
                        }
                    }
                }

                return(Matrix);
            }
        }
Beispiel #27
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleBinaryOpTest__MultiplyLowInt32();

            fixed(Vector128 <Int32> *pFld1 = &test._fld1)
            fixed(Vector128 <Int32> *pFld2 = &test._fld2)
            {
                var result = Sse41.MultiplyLow(
                    Sse2.LoadVector128((Int32 *)(pFld1)),
                    Sse2.LoadVector128((Int32 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
            }
        }
Beispiel #28
0
        public static Vector128 <int>[,] MultiplyMatrixSSE(int[,] a, int[,] b)
        {
            unsafe
            {
                int length  = a.GetLength(0);
                int length2 = a.GetLength(1);
                int length3 = 0;
                int length4 = 0;
                try
                {
                    length3 = length;
                    length4 = length2 / 4;
                }
                catch (Exception e)
                {
                    Console.WriteLine("Error");
                }

                Vector128 <int>[,] A = new Vector128 <int> [length3, length4];

                int c = 0;


                fixed(int *ptr = a)
                {
                    fixed(int *ptr2 = b)
                    {
                        for (int i = 0; i < length3; i++)
                        {
                            for (int j = 0; j < length4; j++)
                            {
                                var v  = Sse41.LoadVector128(ptr + c);
                                var v2 = Sse41.LoadVector128(ptr2 + c);
                                c      += 4;
                                A[i, j] = Sse41.MultiplyLow(v, v2);
                            }
                        }
                    }
                }

                return(A);
            }
        }
Beispiel #29
0
    public static Vector128 <short> DivideBy10(this Vector128 <short> dividend)
    {
        // Convert to two 32-bit integers
        Vector128 <int> a_hi = Sse2.ShiftRightArithmetic(dividend.AsInt32(), 16);
        Vector128 <int> a_lo = Sse2.ShiftLeftLogical(dividend.AsInt32(), 16);

        a_lo = Sse2.ShiftRightArithmetic(a_lo, 16);

        Vector128 <int> div10_hi;
        Vector128 <int> div10_lo;

        if (Avx2.IsSupported)
        {
            Vector256 <int> a      = Vector256.Create(a_lo, a_hi);
            Vector256 <int> s0     = Avx2.ShiftRightArithmetic(a, 15);
            Vector256 <int> factor = Vector256.Create(26215);
            Vector256 <int> mul    = Avx2.MultiplyLow(a, factor);
            Vector256 <int> s1     = Avx2.ShiftRightArithmetic(mul, 18);
            Vector256 <int> div10  = Avx2.Subtract(s1, s0);

            div10_hi = div10.GetUpper();
            div10_lo = div10.GetLower();
        }
        else
        {
            Vector128 <int> s0_hi = Sse2.ShiftRightArithmetic(a_hi, 15);
            Vector128 <int> s0_lo = Sse2.ShiftRightArithmetic(a_lo, 15);

            Vector128 <int> factor = Vector128.Create(26215);
            Vector128 <int> mul_hi = Sse41.MultiplyLow(a_hi, factor);
            Vector128 <int> mul_lo = Sse41.MultiplyLow(a_lo, factor);

            Vector128 <int> s1_hi = Sse2.ShiftRightArithmetic(mul_hi, 18);
            Vector128 <int> s1_lo = Sse2.ShiftRightArithmetic(mul_lo, 18);

            div10_hi = Sse2.Subtract(s1_hi, s0_hi);
            div10_lo = Sse2.Subtract(s1_lo, s0_lo);
        }

        //div10_hi = Sse2.ShiftLeftLogical(div10_hi, 16);
        div10_hi = Sse2.ShiftLeftLogical128BitLane(div10_hi, 2);
        return(Sse41.Blend(div10_lo.AsInt16(), div10_hi.AsInt16(), 0xAA));
    }
        public static i32 Mul(i32 lhs, i32 rhs)
        {
            if (Sse41.IsSupported)
            {
                return(Sse41.MultiplyLow(lhs, rhs));
            }
            else
            {
                var tmp1 = Sse2.Multiply(lhs.AsUInt32(), rhs.AsUInt32()); // mul 2,0
                var tmp2 = Sse2.Multiply(
                    Sse2.ShiftRightLogical128BitLane(lhs, 4).AsUInt32(),
                    Sse2.ShiftRightLogical128BitLane(rhs, 4).AsUInt32()); // mul 3,1

                const byte control = 8;                                   // _MM_SHUFFLE(0,0,2,0)
                return(Sse2.UnpackLow(
                           Sse2.Shuffle(tmp1.AsInt32(), control),
                           Sse2.Shuffle(tmp2.AsInt32(), control))); // shuffle results to [63..0] and pack
            }
        }