예제 #1
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Sse2.Add(
                Unsafe.Read <Vector128 <UInt16> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <UInt16> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #2
0
        public void RunBasicScenario_Load()
        {
            var result = Sse2.Add(
                Sse2.LoadVector128((UInt16 *)(_dataTable.inArray1Ptr)),
                Sse2.LoadVector128((UInt16 *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #3
0
        public void RunClsVarScenario()
        {
            var result = Sse2.Add(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
예제 #4
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Sse2.Add(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #5
0
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleBinaryOpTest__AddUInt64();
            var result = Sse2.Add(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #6
0
        public static Vector4UInt32 Subtract(Vector4UInt32Param1_3 vector, uint scalar)
        {
            if (Sse2.IsSupported)
            {
                Vector4UInt32 expand = Vector128.Create(scalar);
                return(Sse2.Add(vector, expand));
            }

            return(Subtract_Software(vector, scalar));
        }
예제 #7
0
        /// <summary>
        /// d = h * r
        /// </summary>
        private void MultiplyR(
            ref Vector128 <uint> h01, ref Vector128 <uint> h23, ref Vector128 <uint> h44,
            out ulong d0, out ulong d1, out ulong d2, out ulong d3, out ulong d4)
        {
            // h0 * r0 + h2 * s3
            // h1 * s4 + h3 * s2
            var t00 = Sse2.Multiply(h01, _r0s4);
            var t01 = Sse2.Multiply(h23, _s3s2);
            var t1  = Sse2.Add(t01, t00);

            // h0 * r1 + h2 * s4
            // h1 * r0 + h3 * s3
            t00 = Sse2.Multiply(h01, _r1r0);
            t01 = Sse2.Multiply(h23, _s4s3);
            var t2 = Sse2.Add(t01, t00);
            // h4 * s1
            // h4 * s2
            var t3 = Sse2.Multiply(h44, _s1s2);
            // d0 = t1[0] + t1[1] + t3[0]
            // d1 = t2[0] + t2[1] + t3[1]
            var t = Sse2.UnpackLow(t1, t2).Add(Sse2.UnpackHigh(t1, t2)).Add(t3);

            d0 = t.ToScalar();
            d1 = Sse2.ShiftRightLogical128BitLane(t, 8).ToScalar();

            // h0 * r2 + h2 * r0
            // h1 * r1 + h3 * s4
            t00 = Sse2.Multiply(h01, _r2r1);
            t01 = Sse2.Multiply(h23, _r0s4);
            t1  = Sse2.Add(t01, t00);
            // h0 * r3 + h2 * r1
            // h1 * r2 + h3 * r0
            t00 = Sse2.Multiply(h01, _r3r2);
            t01 = Sse2.Multiply(h23, _r1r0);
            t2  = Sse2.Add(t01, t00);
            // h4 * s3
            // h4 * s4
            t3 = Sse2.Multiply(h44, _s3s4);
            // d2 = t1[0] + t1[1] + t3[0]
            // d3 = t2[0] + t2[1] + t3[1]
            t  = Sse2.UnpackLow(t1, t2).Add(Sse2.UnpackHigh(t1, t2)).Add(t3);
            d2 = t.ToScalar();
            d3 = Sse2.ShiftRightLogical128BitLane(t, 8).ToScalar();

            // h0 * r4 + h2 * r2
            // h1 * r3 + h3 * r1
            t00 = Sse2.Multiply(h01, _r4r3);
            t01 = Sse2.Multiply(h23, _r2r1);
            t1  = Sse2.Add(t01, t00);
            // h4 * r0
            t3 = Sse2.Multiply(h44, _r0);
            // d4 = t1[0] + t1[1] + t3[0]
            d4 = t1.Add(Sse2.ShiftRightLogical128BitLane(t1, 8)).Add(t3).ToScalar();
        }
예제 #8
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Sse2.LoadAlignedVector128((Int64 *)(_dataTable.inArray1Ptr));
            var op2    = Sse2.LoadAlignedVector128((Int64 *)(_dataTable.inArray2Ptr));
            var result = Sse2.Add(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
예제 #9
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var left   = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr));
            var right  = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr));
            var result = Sse2.Add(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
예제 #10
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector128 <Int64> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector128 <Int64> >(_dataTable.inArray2Ptr);
            var result = Sse2.Add(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
예제 #11
0
        private unsafe ulong HashSse(byte *buf, int len)
        {
            ulong           h       = 0;
            Vector128 <int> v_ps    = Vector128 <int> .Zero;
            bool            useSse4 = Sse41.IsSupported;

            int i = 0;

            for (int j = len - i - 1; len - i >= 4; i += 4, j = len - i - 1)
            {
                Vector128 <int> c_v = Sse2.LoadVector128(&kMultFactorsPtr[j - 3]);
                c_v = Sse2.Shuffle(c_v, SO123);
                Vector128 <byte> q_v = Sse2.LoadVector128(buf + i);

                Vector128 <int> s_v;
                if (useSse4)
                {
                    s_v = Sse41.ConvertToVector128Int32(q_v);
                }
                else
                {
                    q_v = Sse2.UnpackLow(q_v, q_v);
                    s_v = Sse2.ShiftRightLogical(Sse2.UnpackLow(q_v.AsUInt16(), q_v.AsUInt16()).AsInt32(), 24);
                }

                if (useSse4)
                {
                    v_ps = Sse2.Add(v_ps, Sse41.MultiplyLow(c_v, s_v));
                }
                else
                {
                    Vector128 <ulong> v_tmp1 = Sse2.Multiply(c_v.AsUInt32(), s_v.AsUInt32());
                    Vector128 <ulong> v_tmp2 =
                        Sse2.Multiply(Sse2.ShiftRightLogical128BitLane(c_v.AsByte(), 4).AsUInt32(),
                                      Sse2.ShiftRightLogical128BitLane(s_v.AsByte(), 4).AsUInt32());
                    ;
                    v_ps = Sse2.Add(v_ps, Sse2.UnpackLow(Sse2.Shuffle(v_tmp1.AsInt32(), SOO2O),
                                                         Sse2.Shuffle(v_tmp2.AsInt32(), SOO2O)));
                }
            }

            v_ps = Sse2.Add(v_ps, Sse2.Shuffle(v_ps, S23O1));
            v_ps = Sse2.Add(v_ps, Sse2.Shuffle(v_ps, S1O32));
            h   += Sse2.ConvertToUInt32(v_ps.AsUInt32());

            for (; i < len; i++)
            {
                int   index = len - i - 1;
                ulong c     = (uint)kMultFactors[index];
                h += c * buf[i];
            }

            return(h & (kBase - 1));
        }
예제 #12
0
        public unsafe int CalculateDistance(ReadOnlySpan <char> source, ReadOnlySpan <char> target)
        {
            var columns = target.Length + 1;

            columns += Vector128 <int> .Count - (columns & (Vector128 <int> .Count - 1));

            var costMatrix = Enumerable
                             .Range(0, source.Length + 1)
                             .Select(line => new int[columns])
                             .ToArray();

            for (var i = 1; i <= source.Length; ++i)
            {
                costMatrix[i][0] = i;
            }

            for (var i = 1; i <= target.Length; ++i)
            {
                costMatrix[0][i] = i;
            }

            var allOnesVectors = Vector128.Create(1);

            for (var i = 1; i <= source.Length; ++i)
            {
                fixed(int *prevRowPtr = costMatrix[i - 1])
                {
                    var previousRow = new Span <int>(costMatrix[i - 1]);

                    for (int columnIndex = 0, l = target.Length + 1; columnIndex <= l; columnIndex += Vector128 <int> .Count)
                    {
                        var columnsCovered = Sse2.LoadVector128(prevRowPtr + columnIndex);
                        var addedColumns   = Sse2.Add(columnsCovered, allOnesVectors);
                        Sse2.Store(prevRowPtr + columnIndex, addedColumns);
                    }
                }

                for (var j = 1; j <= target.Length; ++j)
                {
                    var insert = costMatrix[i][j - 1] + 1;
                    var delete = costMatrix[i - 1][j];
                    var edit   = costMatrix[i - 1][j - 1];

                    if (source[i - 1] == target[j - 1])
                    {
                        edit -= 1;
                    }

                    costMatrix[i][j] = Math.Min(Math.Min(insert, delete), edit);
                }
            }

            return(costMatrix[source.Length][target.Length]);
        }
예제 #13
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var left   = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr);
            var result = Sse2.Add(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
예제 #14
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Sse2.Add(
                Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #15
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Sse2.Add(
                Sse2.LoadVector128((Int64 *)(_dataTable.inArray1Ptr)),
                Sse2.LoadVector128((Int64 *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
예제 #16
0
    public static Vector128 <short> F1_v128i(int t)
    {
        int ti = 1 - t;
        int t0 = ti * ti * ti;
        int t1 = 3 * ti * ti * t;
        int t2 = 3 * ti * t * t;
        int t3 = t * t * t;
        Vector128 <short> tmp1 = Sse2.Add(Sse2.Subtract(Sse2.SetAllVector128((short)t0), s_v128i_0), Sse2.Subtract(Sse2.SetAllVector128((short)t1), s_v128i_1));
        Vector128 <short> tmp2 = Sse2.Add(Sse2.Subtract(Sse2.SetAllVector128((short)t2), s_v128i_2), Sse2.Subtract(Sse2.SetAllVector128((short)t3), s_v128i_3));

        return(Sse2.Add(tmp1, tmp2));
    }
예제 #17
0
        public void RunClsVarScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));

            var result = Sse2.Add(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
        public int Shanten()
        {
            var b2 = Ssse3.Shuffle(_b, _reverseBVector);
            var r0 = Sse2.Add(_a, b2);
            var r1 = Sse2.Subtract(_inversionVector, r0);
            var r3 = Sse2.ShiftRightLogical(r1.AsInt16(), 8);
            var r4 = Sse2.Min(r1, r3.AsByte());
            var r5 = Sse41.MinHorizontal(r4.AsUInt16());
            var r6 = (byte)Sse2.ConvertToInt32(r5.AsInt32());

            return(r6 - 1);
        }
예제 #19
0
            public static int ReduceSum(Vector256 <int> accumulator)
            {
                // Add upper lane to lower lane.
                Vector128 <int> vsum = Sse2.Add(accumulator.GetLower(), accumulator.GetUpper());

                // Add odd to even.
                vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_11_01_01));

                // Add high to low.
                vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10));

                return(Sse2.ConvertToInt32(vsum));
            }
예제 #20
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Sse2.Add(
                Sse2.LoadVector128((Int64 *)(&test._fld1)),
                Sse2.LoadVector128((Int64 *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
예제 #21
0
    private static void QuarterRound(ref Vector128 <uint> a, ref Vector128 <uint> b, ref Vector128 <uint> c, ref Vector128 <uint> d)
    {
        a = Sse2.Add(a, b);
        d = Sse2.Xor(a, d).RotateLeftUInt32_16();

        c = Sse2.Add(c, d);
        b = Sse2.Xor(b, c).RotateLeftUInt32(12);

        a = Sse2.Add(a, b);
        d = Sse2.Xor(a, d).RotateLeftUInt32_8();

        c = Sse2.Add(c, d);
        b = Sse2.Xor(b, c).RotateLeftUInt32(7);
    }
예제 #22
0
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__AddInt64 testClass)
            {
                fixed(Vector128 <Int64> *pFld1 = &_fld1)
                fixed(Vector128 <Int64> *pFld2 = &_fld2)
                {
                    var result = Sse2.Add(
                        Sse2.LoadVector128((Int64 *)(pFld1)),
                        Sse2.LoadVector128((Int64 *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
예제 #23
0
    public static Vector128 <T> Vector128PlusOne <T>(Vector128 <T> v1) where T : struct
    {
        Vector128 <T> v2 = Vector128One <T>();

        if (typeof(T) == typeof(float))
        {
            return(Sse.StaticCast <float, T>(Sse.Add(Sse.StaticCast <T, float>(v1), Sse.StaticCast <T, float>(v2))));
        }
        else if (typeof(T) == typeof(double))
        {
            return(Sse.StaticCast <double, T>(Sse2.Add(Sse.StaticCast <T, double>(v1), Sse.StaticCast <T, double>(v2))));
        }
        else if (typeof(T) == typeof(byte))
        {
            return(Sse.StaticCast <byte, T>(Sse2.Add(Sse.StaticCast <T, byte>(v1), Sse.StaticCast <T, byte>(v2))));
        }
        else if (typeof(T) == typeof(sbyte))
        {
            return(Sse.StaticCast <sbyte, T>(Sse2.Add(Sse.StaticCast <T, sbyte>(v1), Sse.StaticCast <T, sbyte>(v2))));
        }
        else if (typeof(T) == typeof(short))
        {
            return(Sse.StaticCast <short, T>(Sse2.Add(Sse.StaticCast <T, short>(v1), Sse.StaticCast <T, short>(v2))));
        }
        else if (typeof(T) == typeof(ushort))
        {
            return(Sse.StaticCast <ushort, T>(Sse2.Add(Sse.StaticCast <T, ushort>(v1), Sse.StaticCast <T, ushort>(v2))));
        }
        else if (typeof(T) == typeof(int))
        {
            return(Sse.StaticCast <int, T>(Sse2.Add(Sse.StaticCast <T, int>(v1), Sse.StaticCast <T, int>(v2))));
        }
        else if (typeof(T) == typeof(uint))
        {
            return(Sse.StaticCast <uint, T>(Sse2.Add(Sse.StaticCast <T, uint>(v1), Sse.StaticCast <T, uint>(v2))));
        }
        else if (typeof(T) == typeof(long))
        {
            return(Sse.StaticCast <long, T>(Sse2.Add(Sse.StaticCast <T, long>(v1), Sse.StaticCast <T, long>(v2))));
        }
        else if (typeof(T) == typeof(ulong))
        {
            return(Sse.StaticCast <ulong, T>(Sse2.Add(Sse.StaticCast <T, ulong>(v1), Sse.StaticCast <T, ulong>(v2))));
        }
        else
        {
            throw new NotSupportedException();
        }
    }
예제 #24
0
        public static unsafe void CalculateDiagonalSection_Sse41 <T>(void *refDiag1Ptr, void *refDiag2Ptr, char *sourcePtr, char *targetPtr, ref int rowIndex, int columnIndex) where T : struct
        {
            if (typeof(T) == typeof(int))
            {
                var diag1Ptr = (int *)refDiag1Ptr;
                var diag2Ptr = (int *)refDiag2Ptr;

                var sourceVector = Sse41.ConvertToVector128Int32((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count);
                var targetVector = Sse41.ConvertToVector128Int32((ushort *)targetPtr + columnIndex - 1);
                targetVector = Sse2.Shuffle(targetVector, 0x1b);
                var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector);

                var substitutionCost = Sse2.Add(
                    Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count),
                    substitutionCostAdjustment
                    );

                var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1));
                var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count);

                var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost);
                localCost = Sse2.Add(localCost, Vector128.Create(1));

                Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost);
            }
            else if (typeof(T) == typeof(ushort))
            {
                var diag1Ptr = (ushort *)refDiag1Ptr;
                var diag2Ptr = (ushort *)refDiag2Ptr;

                var sourceVector = Sse3.LoadDquVector128((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count);
                var targetVector = Sse3.LoadDquVector128((ushort *)targetPtr + columnIndex - 1);
                targetVector = Ssse3.Shuffle(targetVector.AsByte(), REVERSE_USHORT_AS_BYTE_128).AsUInt16();
                var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector);

                var substitutionCost = Sse2.Add(
                    Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count),
                    substitutionCostAdjustment
                    );

                var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1));
                var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count);

                var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost);
                localCost = Sse2.Add(localCost, Vector128.Create((ushort)1));

                Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost);
            }
        }
예제 #25
0
        private static void Salsa8Sse2(Vector128 <uint>[] blocks)
        {
            var x0 = blocks[0];
            var x1 = blocks[1];
            var x2 = blocks[2];
            var x3 = blocks[3];

            for (var i = 0; i < 8; i += 2)
            {
                var t = Sse2.Add(x0, x3);
                x1 = Sse2.Xor(x1, Sse2.ShiftLeftLogical(t, 7));
                x1 = Sse2.Xor(x1, Sse2.ShiftRightLogical(t, 25));
                t  = Sse2.Add(x1, x0);
                x2 = Sse2.Xor(x2, Sse2.ShiftLeftLogical(t, 9));
                x2 = Sse2.Xor(x2, Sse2.ShiftRightLogical(t, 23));
                t  = Sse2.Add(x2, x1);
                x3 = Sse2.Xor(x3, Sse2.ShiftLeftLogical(t, 13));
                x3 = Sse2.Xor(x3, Sse2.ShiftRightLogical(t, 19));
                t  = Sse2.Add(x3, x2);
                x0 = Sse2.Xor(x0, Sse2.ShiftLeftLogical(t, 18));
                x0 = Sse2.Xor(x0, Sse2.ShiftRightLogical(t, 14));

                x1 = Sse2.Shuffle(x1, 0x93);
                x2 = Sse2.Shuffle(x2, 0x4E);
                x3 = Sse2.Shuffle(x3, 0x39);

                t  = Sse2.Add(x0, x1);
                x3 = Sse2.Xor(x3, Sse2.ShiftLeftLogical(t, 7));
                x3 = Sse2.Xor(x3, Sse2.ShiftRightLogical(t, 25));
                t  = Sse2.Add(x3, x0);
                x2 = Sse2.Xor(x2, Sse2.ShiftLeftLogical(t, 9));
                x2 = Sse2.Xor(x2, Sse2.ShiftRightLogical(t, 23));
                t  = Sse2.Add(x2, x3);
                x1 = Sse2.Xor(x1, Sse2.ShiftLeftLogical(t, 13));
                x1 = Sse2.Xor(x1, Sse2.ShiftRightLogical(t, 19));
                t  = Sse2.Add(x1, x2);
                x0 = Sse2.Xor(x0, Sse2.ShiftLeftLogical(t, 18));
                x0 = Sse2.Xor(x0, Sse2.ShiftRightLogical(t, 14));

                x1 = Sse2.Shuffle(x1, 0x39);
                x2 = Sse2.Shuffle(x2, 0x4E);
                x3 = Sse2.Shuffle(x3, 0x93);
            }

            blocks[0] = Sse2.Add(blocks[0], x0);
            blocks[1] = Sse2.Add(blocks[1], x1);
            blocks[2] = Sse2.Add(blocks[2], x2);
            blocks[3] = Sse2.Add(blocks[3], x3);
        }
예제 #26
0
 public long[] Sse2Test()
 {
     unsafe
     {
         fixed(long *lp = _data)
         {
             for (int i = 0; i < _data.Length; i += 2)
             {
                 Sse2.StoreAlignedNonTemporal(lp + i, _v);
                 _v = Sse2.Add(_v, _v2);
             }
         }
     }
     return(_data);
 }
예제 #27
0
        private unsafe static void VariantTwoShuffleAdd(
            byte *basePtr,
            int offset,
            Vector128 <byte> _b1,
            Vector128 <byte> _b,
            Vector128 <byte> _a)
        {
            Vector128 <ulong> chunk1 = Sse2.LoadVector128((ulong *)(basePtr + (offset ^ 0x10)));
            Vector128 <ulong> chunk2 = Sse2.LoadVector128((ulong *)(basePtr + (offset ^ 0x20)));
            Vector128 <ulong> chunk3 = Sse2.LoadVector128((ulong *)(basePtr + (offset ^ 0x30)));

            Sse2.Store((ulong *)(basePtr + (offset ^ 0x10)), Sse2.Add(chunk3, _b1.AsUInt64()));
            Sse2.Store((ulong *)(basePtr + (offset ^ 0x20)), Sse2.Add(chunk1, _b.AsUInt64()));
            Sse2.Store((ulong *)(basePtr + (offset ^ 0x30)), Sse2.Add(chunk2, _a.AsUInt64()));
        }
예제 #28
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Int64> *pFld1 = &_fld1)
            fixed(Vector128 <Int64> *pFld2 = &_fld2)
            {
                var result = Sse2.Add(
                    Sse2.LoadVector128((Int64 *)(pFld1)),
                    Sse2.LoadVector128((Int64 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
예제 #29
0
        public static double ReduceSum(this Vector <double> vector)
        {
#if NETCOREAPP3_0
            if (Avx.IsSupported)
            {
                Vector256 <double> a     = Unsafe.As <Vector <double>, Vector256 <double> >(ref vector);
                Vector256 <double> tmp   = Avx.HorizontalAdd(a, a);
                Vector128 <double> hi128 = tmp.GetUpper();
                Vector128 <double> lo128 = tmp.GetLower();
                Vector128 <double> s     = Sse2.Add(lo128, hi128);

                return(s.ToScalar());
            }
#endif
            return(Vector.Dot(Vector <double> .One, vector));
        }
예제 #30
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector128 <SByte> *pClsVar1 = &_clsVar1)
            fixed(Vector128 <SByte> *pClsVar2 = &_clsVar2)
            {
                var result = Sse2.Add(
                    Sse2.LoadVector128((SByte *)(pClsVar1)),
                    Sse2.LoadVector128((SByte *)(pClsVar2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
            }
        }