private static void ReverseEndianess(ulong *source, ulong *dest, int len)
        {
            int vecLen = Vector128 <ulong> .Count;

            if (Ssse3.IsSupported && len >= vecLen)
            {
                int i = 0;

                do
                {
                    var vec = Sse2.LoadVector128(source + i);

                    vec = Ssse3.Shuffle(vec.AsByte(), ReverseEndianess_64_128).AsUInt64();

                    Sse2.Store(dest + i, vec);

                    i += Vector128 <ulong> .Count;
                }while (len - i >= Vector128 <ulong> .Count);

                //Remainder problem
                if (i != len)
                {
                    dest[i] = BinaryPrimitives.ReverseEndianness(source[i]);
                }

                return;
            }

            for (int i = 0; i < len; ++i)
            {
                dest[i] = BinaryPrimitives.ReverseEndianness(source[i]);
            }
        }
        private static void ReverseEndianess(ulong *ptr, int len)
        {
            int i = 0;

            if (Ssse3.IsSupported && len >= 4)
            {
                do
                {
                    var ptrTmp = ptr + i;

                    var vec = Sse2.LoadVector128(ptrTmp);

                    vec = Ssse3.Shuffle(vec.AsByte(), ReverseEndianess_64_128).AsUInt64();

                    Sse2.Store(ptrTmp, vec);

                    i += Vector128 <ulong> .Count;
                }while (len - i >= Vector128 <ulong> .Count);

                if (i != len)
                {
                    ptr[i] = BinaryPrimitives.ReverseEndianness(ptr[i]);
                }

                return;
            }

            for (; i < len; ++i)
            {
                ptr[i] = BinaryPrimitives.ReverseEndianness(ptr[i]);
            }
        }
        /// <summary>
        /// Combines the vectors into a single one with the same layout
        /// 00,01,02,03,04,10,11,12,13,14,__,__,__,k0,k1,cc
        /// </summary>
        private static Vector128 <byte> CalculatePhase1(Vector128 <byte> a, Vector128 <byte> b)
        {
            // first calculate all the sums, then merge them down with repeated vertical max
            // inlined vector creation is faster than static fields, if created from two 64 bit values

            var va1 = Ssse3.Shuffle(a, Vector128.Create(0x01_03_02_01_04_03_02_01UL, 0xFF_0D_01_02_01_01_02_01UL).AsByte());
            var vb1 = Ssse3.Shuffle(b, Vector128.Create(0x01_05_06_07_05_06_07_08UL, 0xFF_0E_02_02_03_05_05_06UL).AsByte());

            var vab1 = Sse2.Add(va1, vb1);

            var va2 = Ssse3.Shuffle(a, Vector128.Create(0xFF_07_06_05_08_07_06_05UL, 0xFF_0E_02_FF_03_05_06_05UL).AsByte());
            var vb2 = Ssse3.Shuffle(b, Vector128.Create(0xFF_01_02_03_01_02_03_04UL, 0xFF_0D_01_FF_01_01_01_02UL).AsByte());

            var vab2 = Sse2.Add(va2, vb2);

            var xab1 = Sse2.Max(vab1, vab2);

            var vab21 = Ssse3.Shuffle(xab1, Vector128.Create(0x07_0D_0B_FF_0A_08_01_00UL, 0xFF_0E_FF_FF_06_04_FF_FFUL).AsByte());
            var vab22 = Ssse3.Shuffle(xab1, Vector128.Create(0xFF_FF_0C_FF_FF_09_03_02UL, 0xFF_FF_FF_FF_FF_05_FF_FFUL).AsByte());

            var xab2 = Sse2.Max(vab21, vab22);

            var vab31 = Ssse3.Shuffle(xab2, Vector128.Create(0x02_03_FF_05_06_07_FF_FFUL, 0xFF_0E_FF_FF_FF_0F_00_0AUL).AsByte());
            var vab32 = Ssse3.Shuffle(xab2, Vector128.Create(0xFF_FF_FF_FF_FF_FF_FF_FFUL, 0xFF_FF_FF_FF_FF_FF_01_0BUL).AsByte());

            // calculates chiitoitsu sum and kokushi without pair sum
            // barely not enough space in above calculation for these
            var b2 = Sse2.And(b, Vector128.Create(0UL, 0xFF_00_FF_00_00_00_00_00UL).AsByte());
            var a2 = Sse2.Add(a, b2);

            return(Sse2.Max(Sse2.Max(a2, b), Sse2.Max(vab31, vab32)));
        }
Пример #4
0
            public void RunStructFldScenario(SimpleBinaryOpTest__ShuffleSByte testClass)
            {
                var result = Ssse3.Shuffle(_fld1, _fld2);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
            }
Пример #5
0
        public void RunClassFldScenario()
        {
            var result = Ssse3.Shuffle(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
Пример #6
0
        public int TryParseSIMDUseCount(byte *p, int cnt, out int n)
        {
            var tmp  = Sse2.LoadVector128(p);
            var tmp1 = Sse.StaticCast <byte, sbyte>(tmp);

            tmp1 = Sse2.Subtract(tmp1, subtmp);

            var data0 = Ssse3.Shuffle(tmp1, mask0);


            var data1 = Ssse3.Shuffle(tmp1, mask1);


            var mul0 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), mul0Array[cnt]);
            var mul1 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data1), mul1Array[cnt]);
            var x    = Sse2.Add(mul0, mul1);

            x = Ssse3.HorizontalAdd(x, x);
            x = Ssse3.HorizontalAdd(x, x);

            n = Sse41.Extract(x, 3);



            var com0 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp0Array[cnt]), _9);
            var com1 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp1Array[cnt]), _9);

            var xx = Sse2.Add(com0, com1);

            xx = Ssse3.HorizontalAdd(xx, xx);
            xx = Ssse3.HorizontalAdd(xx, xx);

            return(Sse41.Extract(xx, 3));
        }
Пример #7
0
        /// <summary>
        /// Searches for an opening character from a registered parser in the specified string.
        /// </summary>
        /// <param name="text">The text.</param>
        /// <param name="start">The start.</param>
        /// <param name="end">The end.</param>
        /// <returns>Index position within the string of the first opening character found in the specified text; if not found, returns -1</returns>
        public int IndexOfOpeningCharacter(string text, int start, int end)
        {
            Debug.Assert(text is not null);
            Debug.Assert(start >= 0 && end >= 0);
            Debug.Assert(end - start + 1 >= 0);
            Debug.Assert(end - start + 1 <= text.Length);

            if (nonAsciiMap is null)
            {
#if NETCOREAPP3_1_OR_GREATER
                if (Ssse3.IsSupported && BitConverter.IsLittleEndian)
                {
                    // Based on http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm
                    // Optimized for sets in the [1, 127] range

                    int lengthMinusOne           = end - start;
                    int charsToProcessVectorized = lengthMinusOne & ~(2 * Vector128 <short> .Count - 1);
                    int finalStart = start + charsToProcessVectorized;

                    if (start < finalStart)
                    {
                        ref char         textStartRef = ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start);
                        Vector128 <byte> bitmap       = _asciiBitmap;
                        do
                        {
                            // Load 32 bytes (16 chars) into two Vector128<short>s (chars)
                            // Drop the high byte of each char
                            // Pack the remaining bytes into a single Vector128<byte>
                            Vector128 <byte> input = Sse2.PackUnsignedSaturate(
                                Unsafe.ReadUnaligned <Vector128 <short> >(ref Unsafe.As <char, byte>(ref textStartRef)),
                                Unsafe.ReadUnaligned <Vector128 <short> >(ref Unsafe.As <char, byte>(ref Unsafe.Add(ref textStartRef, Vector128 <short> .Count))));

                            // Extract the higher nibble of each character ((input >> 4) & 0xF)
                            Vector128 <byte> higherNibbles = Sse2.And(Sse2.ShiftRightLogical(input.AsUInt16(), 4).AsByte(), Vector128.Create((byte)0xF));

                            // Lookup the matching higher nibble for each character based on the lower nibble
                            // PSHUFB will set the result to 0 for any non-ASCII (> 127) character
                            Vector128 <byte> bitsets = Ssse3.Shuffle(bitmap, input);

                            // Calculate a bitmask (1 << (higherNibble % 8)) for each character
                            Vector128 <byte> bitmask = Ssse3.Shuffle(Vector128.Create(0x8040201008040201).AsByte(), higherNibbles);

                            // Check which characters are present in the set
                            // We are relying on bitsets being zero for non-ASCII characters
                            Vector128 <byte> result = Sse2.And(bitsets, bitmask);

                            if (!result.Equals(Vector128 <byte> .Zero))
                            {
                                int resultMask = ~Sse2.MoveMask(Sse2.CompareEqual(result, Vector128 <byte> .Zero));
                                return(start + BitOperations.TrailingZeroCount((uint)resultMask));
                            }

                            start       += 2 * Vector128 <short> .Count;
                            textStartRef = ref Unsafe.Add(ref textStartRef, 2 * Vector128 <short> .Count);
                        }while (start != finalStart);
                    }
                }

                ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
Пример #8
0
        public void RunClassLclFldScenario()
        {
            var test   = new SimpleBinaryOpTest__ShuffleSByte();
            var result = Ssse3.Shuffle(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Пример #9
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Ssse3.Shuffle(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Пример #10
0
        private unsafe bool TryParseInt(long input, out int value)
        {
            var vector = input - ShortCharA;
            var r      = (vector & ShortN15) == 0;

            vector = (long)((((ulong)vector) << 4) | (((ulong)vector) >> 8));
            value  = Sse41.Extract(Ssse3.Shuffle(Vector128.CreateScalar(vector).AsSByte(), NShuffleMask).AsInt32(), 0);
            return(r);
        }
Пример #11
0
        public void RunLclVarScenario_LoadAligned()
        {
            var left   = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr));
            var right  = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr));
            var result = Ssse3.Shuffle(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Пример #12
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Ssse3.Shuffle(_fld1, _fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
        }
Пример #13
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var left   = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr);
            var result = Ssse3.Shuffle(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Пример #14
0
        public void RunClsVarScenario()
        {
            var result = Ssse3.Shuffle(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
Пример #15
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Ssse3.Shuffle(
                Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr)),
                Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Пример #16
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Ssse3.Shuffle(
                Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Пример #17
0
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleBinaryOpTest__ShuffleByte();
            var result = Ssse3.Shuffle(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Пример #18
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Ssse3.Shuffle(test._fld1, test._fld2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Пример #19
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var left   = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray1Ptr));
            var right  = Sse2.LoadAlignedVector128((SByte *)(_dataTable.inArray2Ptr));
            var result = Ssse3.Shuffle(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Пример #20
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var left   = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr);
            var result = Ssse3.Shuffle(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Пример #21
0
        private static void EncodeToUtf16_Ssse3(ReadOnlySpan <byte> bytes, Span <char> chars, Casing casing)
        {
            Debug.Assert(bytes.Length >= 4);
            nint pos = 0;

            Vector128 <byte> shuffleMask = Vector128.Create(
                0xFF, 0xFF, 0, 0xFF, 0xFF, 0xFF, 1, 0xFF,
                0xFF, 0xFF, 2, 0xFF, 0xFF, 0xFF, 3, 0xFF);

            Vector128 <byte> asciiTable = (casing == Casing.Upper) ?
                                          Vector128.Create((byte)'0', (byte)'1', (byte)'2', (byte)'3',
                                                           (byte)'4', (byte)'5', (byte)'6', (byte)'7',
                                                           (byte)'8', (byte)'9', (byte)'A', (byte)'B',
                                                           (byte)'C', (byte)'D', (byte)'E', (byte)'F') :
                                          Vector128.Create((byte)'0', (byte)'1', (byte)'2', (byte)'3',
                                                           (byte)'4', (byte)'5', (byte)'6', (byte)'7',
                                                           (byte)'8', (byte)'9', (byte)'a', (byte)'b',
                                                           (byte)'c', (byte)'d', (byte)'e', (byte)'f');

            do
            {
                // Read 32bits from "bytes" span at "pos" offset
                uint block = Unsafe.ReadUnaligned <uint>(
                    ref Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos));

                // Calculate nibbles
                Vector128 <byte> lowNibbles = Ssse3.Shuffle(
                    Vector128.CreateScalarUnsafe(block).AsByte(), shuffleMask);
                Vector128 <byte> highNibbles = Sse2.ShiftRightLogical(
                    Sse2.ShiftRightLogical128BitLane(lowNibbles, 2).AsInt32(), 4).AsByte();

                // Lookup the hex values at the positions of the indices
                Vector128 <byte> indices = Sse2.And(
                    Sse2.Or(lowNibbles, highNibbles), Vector128.Create((byte)0xF));
                Vector128 <byte> hex = Ssse3.Shuffle(asciiTable, indices);

                // The high bytes (0x00) of the chars have also been converted
                // to ascii hex '0', so clear them out.
                hex = Sse2.And(hex, Vector128.Create((ushort)0xFF).AsByte());

                // Save to "chars" at pos*2 offset
                Unsafe.WriteUnaligned(
                    ref Unsafe.As <char, byte>(
                        ref Unsafe.Add(ref MemoryMarshal.GetReference(chars), pos * 2)), hex);

                pos += 4;
            } while (pos < bytes.Length - 3);

            // Process trailing elements (bytes.Length % 4)
            for (; pos < bytes.Length; pos++)
            {
                ToCharsBuffer(Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos), chars, (int)pos * 2, casing);
            }
        }
Пример #22
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var op1    = Sse2.LoadVector128((SByte *)(_dataTable.inArray1Ptr));
            var op2    = Sse2.LoadVector128((SByte *)(_dataTable.inArray2Ptr));
            var result = Ssse3.Shuffle(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
Пример #23
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Ssse3.Shuffle(
                Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
        public int Shanten()
        {
            var b2 = Ssse3.Shuffle(_b, _reverseBVector);
            var r0 = Sse2.Add(_a, b2);
            var r1 = Sse2.Subtract(_inversionVector, r0);
            var r3 = Sse2.ShiftRightLogical(r1.AsInt16(), 8);
            var r4 = Sse2.Min(r1, r3.AsByte());
            var r5 = Sse41.MinHorizontal(r4.AsUInt16());
            var r6 = (byte)Sse2.ConvertToInt32(r5.AsInt32());

            return(r6 - 1);
        }
Пример #25
0
        public void RunBasicScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));

            var result = Ssse3.Shuffle(
                Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray1Ptr)),
                Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Пример #26
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Ssse3.Shuffle(
                Sse2.LoadVector128((Byte *)(&test._fld1)),
                Sse2.LoadVector128((Byte *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Пример #27
0
        public static Vector128 <sbyte> CreateEscapingMask(
            Vector128 <sbyte> sourceValue,
            Vector128 <sbyte> bitMaskLookup,
            Vector128 <sbyte> bitPosLookup,
            Vector128 <sbyte> nibbleMaskSByte,
            Vector128 <sbyte> nullMaskSByte)
        {
            // To check if an input byte needs to be escaped or not, we use a bitmask-lookup.
            // Therefore we split the input byte into the low- and high-nibble, which will get
            // the row-/column-index in the bit-mask.
            // The bitmask-lookup looks like (here for example s_bitMaskLookupBasicLatin):
            //                                     high-nibble
            // low-nibble  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
            //         0   1   1   0   0   0   0   1   0   1   1   1   1   1   1   1   1
            //         1   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         2   1   1   1   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         3   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         4   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         5   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         6   1   1   1   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         7   1   1   1   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         8   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         9   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         A   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         B   1   1   1   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         C   1   1   0   1   0   1   0   0   1   1   1   1   1   1   1   1
            //         D   1   1   0   0   0   0   0   0   1   1   1   1   1   1   1   1
            //         E   1   1   0   1   0   0   0   0   1   1   1   1   1   1   1   1
            //         F   1   1   0   0   0   0   0   1   1   1   1   1   1   1   1   1
            //
            // where 1 denotes the neeed for escaping, while 0 means no escaping needed.
            // For high-nibbles in the range 8..F every input needs to be escaped, so we
            // can omit them in the bit-mask, thus only high-nibbles in the range 0..7 need
            // to be considered, hence the entries in the bit-mask can be of type byte.
            //
            // In the bitmask-lookup for each row (= low-nibble) a bit-mask for the
            // high-nibbles (= columns) is created.

            Debug.Assert(Ssse3.IsSupported);

            Vector128 <sbyte> highNibbles = Sse2.And(Sse2.ShiftRightLogical(sourceValue.AsInt32(), 4).AsSByte(), nibbleMaskSByte);
            Vector128 <sbyte> lowNibbles  = Sse2.And(sourceValue, nibbleMaskSByte);

            Vector128 <sbyte> bitMask      = Ssse3.Shuffle(bitMaskLookup, lowNibbles);
            Vector128 <sbyte> bitPositions = Ssse3.Shuffle(bitPosLookup, highNibbles);

            Vector128 <sbyte> mask = Sse2.And(bitPositions, bitMask);

            mask = Sse2.CompareEqual(nullMaskSByte, Sse2.CompareEqual(nullMaskSByte, mask));
            return(mask);
        }
Пример #28
0
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__ShuffleByte testClass)
            {
                fixed(Vector128 <Byte> *pFld1 = &_fld1)
                fixed(Vector128 <Byte> *pFld2 = &_fld2)
                {
                    var result = Ssse3.Shuffle(
                        Sse2.LoadVector128((Byte *)(pFld1)),
                        Sse2.LoadVector128((Byte *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
Пример #29
0
        public static unsafe void CalculateDiagonalSection_Sse41 <T>(void *refDiag1Ptr, void *refDiag2Ptr, char *sourcePtr, char *targetPtr, ref int rowIndex, int columnIndex) where T : struct
        {
            if (typeof(T) == typeof(int))
            {
                var diag1Ptr = (int *)refDiag1Ptr;
                var diag2Ptr = (int *)refDiag2Ptr;

                var sourceVector = Sse41.ConvertToVector128Int32((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count);
                var targetVector = Sse41.ConvertToVector128Int32((ushort *)targetPtr + columnIndex - 1);
                targetVector = Sse2.Shuffle(targetVector, 0x1b);
                var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector);

                var substitutionCost = Sse2.Add(
                    Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count),
                    substitutionCostAdjustment
                    );

                var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1));
                var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count);

                var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost);
                localCost = Sse2.Add(localCost, Vector128.Create(1));

                Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost);
            }
            else if (typeof(T) == typeof(ushort))
            {
                var diag1Ptr = (ushort *)refDiag1Ptr;
                var diag2Ptr = (ushort *)refDiag2Ptr;

                var sourceVector = Sse3.LoadDquVector128((ushort *)sourcePtr + rowIndex - Vector128 <T> .Count);
                var targetVector = Sse3.LoadDquVector128((ushort *)targetPtr + columnIndex - 1);
                targetVector = Ssse3.Shuffle(targetVector.AsByte(), REVERSE_USHORT_AS_BYTE_128).AsUInt16();
                var substitutionCostAdjustment = Sse2.CompareEqual(sourceVector, targetVector);

                var substitutionCost = Sse2.Add(
                    Sse3.LoadDquVector128(diag1Ptr + rowIndex - Vector128 <T> .Count),
                    substitutionCostAdjustment
                    );

                var deleteCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - (Vector128 <T> .Count - 1));
                var insertCost = Sse3.LoadDquVector128(diag2Ptr + rowIndex - Vector128 <T> .Count);

                var localCost = Sse41.Min(Sse41.Min(insertCost, deleteCost), substitutionCost);
                localCost = Sse2.Add(localCost, Vector128.Create((ushort)1));

                Sse2.Store(diag1Ptr + rowIndex - (Vector128 <T> .Count - 1), localCost);
            }
        }
Пример #30
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Byte> *pFld1 = &_fld1)
            fixed(Vector128 <Byte> *pFld2 = &_fld2)
            {
                var result = Ssse3.Shuffle(
                    Sse2.LoadVector128((Byte *)(pFld1)),
                    Sse2.LoadVector128((Byte *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }