Ejemplo n.º 1
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Avx2.IsSupported)
            {
                using (TestTable <byte, byte, byte> byteTable = new TestTable <byte, byte, byte>(new byte[32] {
                    1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0
                }, new byte[32] {
                    22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0
                }, new byte[32]))
                    using (TestTable <sbyte, sbyte, sbyte> sbyteTable = new TestTable <sbyte, sbyte, sbyte>(new sbyte[32] {
                        1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0
                    }, new sbyte[32] {
                        22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0
                    }, new sbyte[32]))
                        using (TestTable <short, short, short> shortTable = new TestTable <short, short, short>(new short[16] {
                            1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0
                        }, new short[16] {
                            22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0
                        }, new short[16]))
                            using (TestTable <ushort, ushort, ushort> ushortTable = new TestTable <ushort, ushort, ushort>(new ushort[16] {
                                1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0
                            }, new ushort[16] {
                                22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0
                            }, new ushort[16]))
                            {
                                var vb1 = Unsafe.Read <Vector256 <byte> >(byteTable.inArray1Ptr);
                                var vb2 = Unsafe.Read <Vector256 <byte> >(byteTable.inArray2Ptr);
                                var vb3 = Avx2.AddSaturate(vb1, vb2);
                                Unsafe.Write(byteTable.outArrayPtr, vb3);

                                var vsb1 = Unsafe.Read <Vector256 <sbyte> >(sbyteTable.inArray1Ptr);
                                var vsb2 = Unsafe.Read <Vector256 <sbyte> >(sbyteTable.inArray2Ptr);
                                var vsb3 = Avx2.AddSaturate(vsb1, vsb2);
                                Unsafe.Write(sbyteTable.outArrayPtr, vsb3);

                                var vs1 = Unsafe.Read <Vector256 <short> >(shortTable.inArray1Ptr);
                                var vs2 = Unsafe.Read <Vector256 <short> >(shortTable.inArray2Ptr);
                                var vs3 = Avx2.AddSaturate(vs1, vs2);
                                Unsafe.Write(shortTable.outArrayPtr, vs3);

                                var vus1 = Unsafe.Read <Vector256 <ushort> >(ushortTable.inArray1Ptr);
                                var vus2 = Unsafe.Read <Vector256 <ushort> >(ushortTable.inArray2Ptr);
                                var vus3 = Avx2.AddSaturate(vus1, vus2);
                                Unsafe.Write(ushortTable.outArrayPtr, vus3);

                                for (int i = 0; i < byteTable.outArray.Length; i++)
                                {
                                    int value = byteTable.inArray1[i] + byteTable.inArray2[i];
                                    value = Math.Max(value, 0);
                                    value = Math.Min(value, byte.MaxValue);
                                    if ((byte)value != byteTable.outArray[i])
                                    {
                                        Console.WriteLine("AVX2 AddSaturate failed on byte:");
                                        Console.WriteLine();

                                        testResult = Fail;
                                        break;
                                    }
                                }

                                for (int i = 0; i < sbyteTable.outArray.Length; i++)
                                {
                                    int value = sbyteTable.inArray1[i] + sbyteTable.inArray2[i];
                                    value = Math.Max(value, sbyte.MinValue);
                                    value = Math.Min(value, sbyte.MaxValue);
                                    if ((sbyte)value != sbyteTable.outArray[i])
                                    {
                                        Console.WriteLine("AVX2 AddSaturate failed on sbyte:");
                                        Console.WriteLine();

                                        testResult = Fail;
                                        break;
                                    }
                                }


                                for (int i = 0; i < shortTable.outArray.Length; i++)
                                {
                                    int value = shortTable.inArray1[i] + shortTable.inArray2[i];
                                    value = Math.Max(value, short.MinValue);
                                    value = Math.Min(value, short.MaxValue);
                                    if ((short)value != shortTable.outArray[i])
                                    {
                                        Console.WriteLine("AVX2 AddSaturate failed on short:");
                                        Console.WriteLine();

                                        testResult = Fail;
                                        break;
                                    }
                                }

                                for (int i = 0; i < ushortTable.outArray.Length; i++)
                                {
                                    int value = ushortTable.inArray1[i] + ushortTable.inArray2[i];
                                    value = Math.Max(value, 0);
                                    value = Math.Min(value, ushort.MaxValue);
                                    if ((ushort)value != ushortTable.outArray[i])
                                    {
                                        Console.WriteLine("AVX2 AddSaturate failed on ushort:");
                                        Console.WriteLine();

                                        testResult = Fail;
                                        break;
                                    }
                                }
                            }
            }

            return(testResult);
        }
Ejemplo n.º 2
0
        // take input from buf and remove useless whitespace, input and output can be
        // the same, result is null terminated, return the string length (minus the null termination)
        public static size_t Minify(uint8_t *buf, size_t len, uint8_t * @out)
        {
            if (!Avx2.IsSupported)
            {
                throw new NotSupportedException("AVX2 is required form SimdJson");
            }

            //C#: load const vectors once (there is no `const _m256` in C#)
            Vector256 <byte> lut_cntrl        = s_lut_cntrl;
            Vector256 <byte> low_nibble_mask  = s_low_nibble_mask;
            Vector256 <byte> high_nibble_mask = s_high_nibble_mask;

            fixed(byte *mask128_epi8 = s_mask128_epi8)
            {
                // Useful constant masks
                const uint64_t even_bits = 0x5555555555555555UL;
                const uint64_t odd_bits  = ~even_bits;
                uint8_t *      initout   = @out;
                uint64_t       prev_iter_ends_odd_backslash =
                    0UL;                               // either 0 or 1, but a 64-bit value
                uint64_t prev_iter_inside_quote = 0UL; // either all zeros or all ones
                size_t   idx = 0;

                if (len >= 64)
                {
                    size_t avxlen = len - 63;

                    for (; idx < avxlen; idx += 64)
                    {
                        Vector256 <byte> input_lo = Avx.LoadVector256((buf + idx + 0));
                        Vector256 <byte> input_hi = Avx.LoadVector256((buf + idx + 32));
                        uint64_t         bs_bits  = cmp_mask_against_input_mini(input_lo, input_hi,
                                                                                Vector256.Create((byte)'\\'));
                        uint64_t start_edges     = bs_bits & ~(bs_bits << 1);
                        uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                        uint64_t even_starts     = start_edges & even_start_mask;
                        uint64_t odd_starts      = start_edges & ~even_start_mask;
                        uint64_t even_carries    = bs_bits + even_starts;
                        uint64_t odd_carries;
                        bool     iter_ends_odd_backslash = add_overflow(
                            bs_bits, odd_starts, &odd_carries);
                        odd_carries |= prev_iter_ends_odd_backslash;
                        prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1UL : 0x0UL;
                        uint64_t even_carry_ends    = even_carries & ~bs_bits;
                        uint64_t odd_carry_ends     = odd_carries & ~bs_bits;
                        uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                        uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                        uint64_t odd_ends           = even_start_odd_end | odd_start_even_end;
                        uint64_t quote_bits         = cmp_mask_against_input_mini(input_lo, input_hi,
                                                                                  Vector256.Create((byte)'"'));
                        quote_bits = quote_bits & ~odd_ends;
                        uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply(
                                                                           Vector128.Create(quote_bits, 0UL).AsUInt64(), Vector128.Create((byte)0xFF).AsUInt64(), 0));
                        quote_mask            ^= prev_iter_inside_quote;
                        prev_iter_inside_quote =
                            (uint64_t)((int64_t)quote_mask >>
                                       63);  // might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University

                        Vector256 <byte> whitespace_shufti_mask = Vector256.Create((byte)0x18);
                        Vector256 <byte> v_lo = Avx2.And(
                            Avx2.Shuffle(low_nibble_mask, input_lo),
                            Avx2.Shuffle(high_nibble_mask,
                                         Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(),
                                                  Vector256.Create((byte)0x7f))));

                        Vector256 <byte> v_hi = Avx2.And(
                            Avx2.Shuffle(low_nibble_mask, input_hi),
                            Avx2.Shuffle(high_nibble_mask,
                                         Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(),
                                                  Vector256.Create((byte)0x7f))));
                        Vector256 <byte> tmp_ws_lo = Avx2.CompareEqual(
                            Avx2.And(v_lo, whitespace_shufti_mask), Vector256.Create((byte)0));
                        Vector256 <byte> tmp_ws_hi = Avx2.CompareEqual(
                            Avx2.And(v_hi, whitespace_shufti_mask), Vector256.Create((byte)0));

                        uint64_t ws_res_0   = (uint32_t)Avx2.MoveMask(tmp_ws_lo);
                        uint64_t ws_res_1   = (uint64_t)Avx2.MoveMask(tmp_ws_hi);
                        uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
                        whitespace &= ~quote_mask;
                        int mask1  = (int)(whitespace & 0xFFFF);
                        int mask2  = (int)((whitespace >> 16) & 0xFFFF);
                        int mask3  = (int)((whitespace >> 32) & 0xFFFF);
                        int mask4  = (int)((whitespace >> 48) & 0xFFFF);
                        int pop1   = hamming((~whitespace) & 0xFFFF);
                        int pop2   = hamming((~whitespace) & (ulong)(0xFFFFFFFF));
                        int pop3   = hamming((~whitespace) & (ulong)(0xFFFFFFFFFFFF));
                        int pop4   = hamming((~whitespace));
                        var vmask1 =
                            _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask2 & 0x7FFF) * 2,
                                                (ulong *)mask128_epi8 + (mask1 & 0x7FFF) * 2);
                        var vmask2 =
                            _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask4 & 0x7FFF) * 2,
                                                (ulong *)mask128_epi8 + (mask3 & 0x7FFF) * 2);
                        var result1 = Avx2.Shuffle(input_lo, vmask1.AsByte());
                        var result2 = Avx2.Shuffle(input_hi, vmask2.AsByte());
                        _mm256_storeu2_m128i((@out + pop1), @out, result1);
                        _mm256_storeu2_m128i((@out + pop3), (@out + pop2),
                                             result2);
                        @out += pop4;
                    }
                }

                // we finish off the job... copying and pasting the code is not ideal here,
                // but it gets the job done.
                if (idx < len)
                {
                    uint8_t *buffer = stackalloc uint8_t[64];
                    memset(buffer, 0, 64);
                    memcpy(buffer, buf + idx, len - idx);
                    var      input_lo = Avx.LoadVector256((buffer));
                    var      input_hi = Avx.LoadVector256((buffer + 32));
                    uint64_t bs_bits  =
                        cmp_mask_against_input_mini(input_lo, input_hi, Vector256.Create((byte)'\\'));
                    uint64_t start_edges     = bs_bits & ~(bs_bits << 1);
                    uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                    uint64_t even_starts     = start_edges & even_start_mask;
                    uint64_t odd_starts      = start_edges & ~even_start_mask;
                    uint64_t even_carries    = bs_bits + even_starts;
                    uint64_t odd_carries;
                    //bool iter_ends_odd_backslash =
                    add_overflow(bs_bits, odd_starts, &odd_carries);
                    odd_carries |= prev_iter_ends_odd_backslash;
                    //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; // we never use it
                    uint64_t even_carry_ends    = even_carries & ~bs_bits;
                    uint64_t odd_carry_ends     = odd_carries & ~bs_bits;
                    uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                    uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                    uint64_t odd_ends           = even_start_odd_end | odd_start_even_end;
                    uint64_t quote_bits         =
                        cmp_mask_against_input_mini(input_lo, input_hi, Vector256.Create((byte)'"'));
                    quote_bits = quote_bits & ~odd_ends;
                    uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply(
                                                                       Vector128.Create(quote_bits, 0UL), Vector128.Create((byte)0xFF).AsUInt64(), 0));
                    quote_mask ^= prev_iter_inside_quote;
                    // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore

                    Vector256 <byte> mask_20 = Vector256.Create((byte)0x20); // c==32
                    Vector256 <byte> mask_70 =
                        Vector256.Create((byte)0x70);                        // adding 0x70 does not check low 4-bits
                    // but moves any value >= 16 above 128

                    Vector256 <byte> tmp_ws_lo = Avx2.Or(
                        Avx2.CompareEqual(mask_20, input_lo),
                        Avx2.Shuffle(lut_cntrl, Avx2.AddSaturate(mask_70, input_lo)));
                    Vector256 <byte> tmp_ws_hi = Avx2.Or(
                        Avx2.CompareEqual(mask_20, input_hi),
                        Avx2.Shuffle(lut_cntrl, Avx2.AddSaturate(mask_70, input_hi)));
                    uint64_t ws_res_0   = (uint32_t)Avx2.MoveMask(tmp_ws_lo);
                    uint64_t ws_res_1   = (uint64_t)Avx2.MoveMask(tmp_ws_hi);
                    uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
                    whitespace &= ~quote_mask;

                    if (len - idx < 64)
                    {
                        whitespace |= ((0xFFFFFFFFFFFFFFFF) << (int)(len - idx));
                    }

                    int mask1 = (int)(whitespace & 0xFFFF);
                    int mask2 = (int)((whitespace >> 16) & 0xFFFF);
                    int mask3 = (int)((whitespace >> 32) & 0xFFFF);
                    int mask4 = (int)((whitespace >> 48) & 0xFFFF);
                    int pop1  = hamming((~whitespace) & 0xFFFF);
                    int pop2  = hamming((~whitespace) & 0xFFFFFFFF);
                    int pop3  = hamming((~whitespace) & 0xFFFFFFFFFFFF);
                    int pop4  = hamming((~whitespace));

                    var vmask1 =
                        _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask2 & 0x7FFF) * 2,
                                            (ulong *)mask128_epi8 + (mask1 & 0x7FFF) * 2);
                    var vmask2 =
                        _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask4 & 0x7FFF) * 2,
                                            (ulong *)mask128_epi8 + (mask3 & 0x7FFF) * 2);
                    var result1 = Avx2.Shuffle(input_lo, vmask1.AsByte());
                    var result2 = Avx2.Shuffle(input_hi, vmask2.AsByte());
                    _mm256_storeu2_m128i((buffer + pop1), buffer,
                                         result1);
                    _mm256_storeu2_m128i((buffer + pop3), (buffer + pop2),
                                         result2);
                    memcpy(@out, buffer, (size_t)pop4);
                    @out += pop4;
                }

                *@out = (byte)'\0';  // NULL termination
                return((size_t)@out - (size_t)initout);
            }
        }
Ejemplo n.º 3
0
        private static unsafe int CalculateDistance(string sourceString, int sourceLength, string targetString, int targetLength, int startIndex)
        {
            var                 arrayPool   = ArrayPool <int> .Shared;
            var                 pooledArray = arrayPool.Rent(targetLength);
            Span <int>          previousRow = pooledArray;
            ReadOnlySpan <char> source      = sourceString.AsSpan().Slice(startIndex, sourceLength);
            ReadOnlySpan <char> target      = targetString.AsSpan().Slice(startIndex, targetLength);

            //ArrayPool values are sometimes bigger than allocated, let's trim our span to exactly what we use
            previousRow = previousRow.Slice(0, targetLength);

            fixed(char *targetPtr = target)
            fixed(char *srcPtr        = source)
            fixed(int *previousRowPtr = previousRow)
            {
                FillRow(previousRowPtr, targetLength);

                var rowIndex = 0;

                //var sourceV = Vector128<short>.Zero;
                const int VECTOR_LENGTH = 16;

                for (; rowIndex < sourceLength - VECTOR_LENGTH - 1; rowIndex += VECTOR_LENGTH)
                {
                    // todo max
                    var temp = Vector128.Create(rowIndex);
                    var diag = Sse42.PackUnsignedSaturate(temp, temp).ToVector256();
                    var one  = Vector256.Create((ushort)1);
                    var left = Avx2.AddSaturate(diag, one);

                    var sourceV = Avx2.LoadVector256((ushort *)(srcPtr + rowIndex));
                    var targetV = Vector256 <ushort> .Zero;

                    var shift = Vector256.CreateScalar(ushort.MaxValue);
                    // First 3  iterations fills the vector
                    for (int columnIndex = 0; columnIndex < VECTOR_LENGTH - 1; columnIndex++)
                    {
                        // Shift in the next character
                        targetV = ShiftLeft(targetV);

                        //targetV = Avx2.Insert(targetV, (ushort)targetPtr[columnIndex], 0);
                        targetV = Avx2.Or(targetV, Vector256.CreateScalar((ushort)targetPtr[columnIndex]));

                        // Insert "(rowIndex + columnIndex + 1)" from the left
                        var leftValue = Vector256.Create(rowIndex + columnIndex + 1);
                        left  = Avx2.Or(Avx2.And(shift, Avx2.PackUnsignedSaturate(leftValue, leftValue)), left);
                        shift = ShiftLeft(shift);

                        // compare source to target
                        // alternativ, compare equal and OR with One
                        var match = Avx2.CompareEqual(sourceV, targetV);
                        var add   = Avx2.AndNot(match, one);
                        var next  = Avx2.AddSaturate(diag, add);

                        // Create next diag which is current up
                        var up = ShiftLeft(left);
                        //up = Sse42.Insert(up, (ushort)previousRowPtr[columnIndex], 0);
                        up = Avx2.Or(up, Vector256.CreateScalar((ushort)previousRowPtr[columnIndex]));

                        var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one);
                        next = Avx2.Min(next, tmp);

                        left = next;
                        diag = up;
                    }

                    var writePtr = previousRowPtr;
                    *   writePtr = left.GetElement(VECTOR_LENGTH - 1);
                    writePtr++;
                    for (int columnIndex = VECTOR_LENGTH; columnIndex < targetLength; columnIndex++)
                    {
                        // Shift in the next character
                        targetV = ShiftLeft(targetV);
                        //targetV = Avx2.Insert(targetV, (ushort)targetPtr[columnIndex], 0);
                        targetV = Avx2.Or(targetV, Vector256.CreateScalar((ushort)targetPtr[columnIndex]));

                        // compare source to target
                        // alternativ, compare equal and OR with One
                        var match = Avx2.CompareEqual(sourceV, targetV);
                        var add   = Avx2.AndNot(match, one);
                        var next  = Avx2.AddSaturate(diag, add);

                        // Create next diag which is current up
                        var up = ShiftLeft(left);
                        //up = Sse42.Insert(up, (ushort)previousRowPtr[columnIndex], 0);
                        up = Avx2.Or(up, Vector256.CreateScalar((ushort)previousRowPtr[columnIndex]));

                        var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one);
                        next = Avx2.Min(next, tmp);

                        left = next;
                        diag = up;

                        // Store one value
                        *writePtr = next.GetElement(VECTOR_LENGTH - 1);
                        writePtr++;
                    }

                    // Finish with last 3 items, dont read any more chars just extract them
                    for (int i = targetLength - (VECTOR_LENGTH - 1); i < previousRow.Length; i++)
                    {
                        // Shift in the next character
                        targetV = ShiftLeft(targetV);

                        // compare source to target
                        // alternativ, compare equal and OR with One
                        var match = Avx2.CompareEqual(sourceV, targetV);
                        var add   = Avx2.AndNot(match, one);
                        var next  = Avx2.AddSaturate(diag, add);

                        // Create next diag which is current up
                        var up = ShiftLeft(left);

                        var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one);
                        next = Avx2.Min(next, tmp);

                        left = next;
                        diag = up;
                        // Store one value
                        previousRowPtr[i] = left.GetElement(VECTOR_LENGTH - 1);
                        //		writePtr++;
                    }

#if DEBUG
                    if (true)
                    {
                        Console.Write("prev values for row {0}:", rowIndex);
                        for (int i = 0; i < targetLength; ++i)
                        {
                            Console.Write("{0} ", previousRow[i]);
                        }
                        Console.WriteLine();
                    }
#endif
                }

                //Calculate Single Rows
                for (; rowIndex < sourceLength; rowIndex++)
                {
                    var lastSubstitutionCost = rowIndex;
                    var lastInsertionCost    = rowIndex + 1;
                    var sourcePrevChar       = source[rowIndex];
#if DEBUG
                    Console.Write("prev values for row {0}:", rowIndex);
                    for (int i = 0; i < targetLength; ++i)
                    {
                        Console.Write("{0} ", previousRow[i]);
                    }
                    Console.WriteLine();
#endif

                    CalculateRow(previousRowPtr, targetPtr, targetLength, sourcePrevChar, lastInsertionCost, lastSubstitutionCost);
                }
            }

            var result = previousRow[targetLength - 1];
            arrayPool.Return(pooledArray);
            return(result);
        }