コード例 #1
0
        public void sfmt_fill_array32(uint32_t *array, int size)
        {
            assert(idx == SFMT_N32);
            assert(size % 4 == 0);
            assert(size >= SFMT_N32);

            gen_rand_array((w128_t *)array, size / 4);
            idx = SFMT_N32;
        }
コード例 #2
0
        // if needed, allocate memory so that the object is able to process JSON
        // documents having up to len bytes and maxdepth "depth"
        public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH)
        {
            if ((maxdepth == 0) || (len == 0))
            {
                return(false);
            }
            if (len > SIMDJSON_MAXSIZE_BYTES)
            {
                return(false);
            }
            if ((len <= bytecapacity) && (depthcapacity < maxdepth))
            {
                return(true);
            }
            Deallocate();
            isvalid              = false;
            bytecapacity         = 0; // will only set it to len after allocations are a success
            n_structural_indexes = 0;
            uint32_t max_structures = (uint32_t)(ROUNDUP_N(len, 64) + 2 + 7);

            structural_indexes = allocate <uint32_t>(max_structures);
            // a pathological input like "[[[[..." would generate len tape elements, so need a capacity of len + 1
            size_t localtapecapacity = ROUNDUP_N(len + 1, 64);
            // a document with only zero-length strings... could have len/3 string
            // and we would need len/3 * 5 bytes on the string buffer
            size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64);

            string_buf = allocate <uint8_t>(localstringcapacity);
            tape       = allocate <uint64_t>(localtapecapacity);
            containing_scope_offset = allocate <uint32_t>(maxdepth);
            ret_address             = allocate <char1>(maxdepth);
            if ((string_buf == null) || (tape == null) ||
                (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null))
            {
                delete(ret_address);
                delete(containing_scope_offset);
                delete(tape);
                delete(string_buf);
                delete(structural_indexes);
                return(false);
            }

            /*
             * // We do not need to initialize this content for parsing, though we could
             * // need to initialize it for safety.
             * memset(string_buf, 0 , localstringcapacity);
             * memset(structural_indexes, 0, max_structures * sizeof(uint32_t));
             * memset(tape, 0, localtapecapacity * sizeof(uint64_t));
             */
            bytecapacity   = len;
            depthcapacity  = maxdepth;
            tapecapacity   = localtapecapacity;
            stringcapacity = localstringcapacity;
            return(true);
        }
コード例 #3
0
        public uint32_t sfmt_genrand_uint32()
        {
            if (idx >= SFMT_N32)
            {
                sfmt_gen_rand_all();
                idx = 0;
            }

            fixed(w128_t *state = this.state)
            {
                uint32_t *psfmt32 = &state[0].u32_0;

                return(psfmt32[idx++]);
            }
        }
コード例 #4
0
        /**
         * This function initializes the internal state array with a 32-bit
         * integer seed.
         *
         * @param sfmt SFMT internal state
         * @param seed a 32-bit integer used as the seed.
         */
        public void sfmt_init_gen_rand(uint32_t seed)
        {
            fixed(w128_t *state = this.state)
            {
                uint32_t *psfmt32 = &state[0].u32_0;

                psfmt32[0] = seed;
                for (uint i = 1; i < SFMT_N32; i++)
                {
                    psfmt32[i] = 1812433253U * (psfmt32[i - 1] ^ (psfmt32[i - 1] >> 30)) + i;
                }
            }

            idx = SFMT_N32;
            period_certification();
        }
コード例 #5
0
ファイル: Program.cs プロジェクト: HelloZhangzy/Exercise
        /*************************************************************************
         * v is the n word data vector
         * k is the 4 word key,128bits
         * n is negative for decoding
         * if n is zero result is 1 and no coding or decoding takes place,
         * otherwise the result is zero
         * assumes 32 bit 'long' and same endian coding and decoding
         **************************************************************************/
        void btea(uint32_t *v, int16_t n, uint32_t *key)
        {
            const DELTA =;
            const MX    =;

            uint32_t y, z, sum;
            uint32_t p, rounds, e;

            if (n > 1)
            { // dencrypt
                rounds = 6 + 52 / n;
                sum    = 0;
                z      = v[n - 1];
                do
                {
                    sum += 0x9e3779b9;
                    e    = (sum >> 2) & 3;
                    for (p = 0; p < n - 1; p++)
                    {
                        y = v[p + 1];
                        z = v[p] += (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z)));
                    }
                    y = v[0];
                    z = v[n - 1] += (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z)));
                } while (--rounds);
            }
            else if (n < -1)
            { //dencrypt
                n      = -n;
                rounds = 6 + 52 / n;
                sum    = rounds * 0x9e3779b9;
                y      = v[0];
                do
                {
                    e = (sum >> 2) & 3;
                    for (p = n - 1; p > 0; p--)
                    {
                        z = v[p - 1];
                        y = v[p] -= (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z)));
                    }
                    z    = v[n - 1];
                    y    = v[0] -= (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z)));
                    sum -= 0x9e3779b9;
                } while (--rounds);
            }
        }
コード例 #6
0
        public uint64_t sfmt_genrand_uint64()
        {
            assert(idx % 2 == 0);
            if (idx >= SFMT_N32)
            {
                sfmt_gen_rand_all();
                idx = 0;
            }

            fixed(w128_t *state = this.state)
            {
                uint32_t *psfmt32 = &state[0].u32_0;
                var       r       = *(uint64_t *)(psfmt32 + idx);

                idx += 2;
                return(r);
            }
        }
コード例 #7
0
        // if needed, allocate memory so that the object is able to process JSON
        // documents having up to len butes and maxdepth "depth"
        public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH)
        {
            if ((maxdepth == 0) || (len == 0))
            {
                Debug.WriteLine("capacities must be non-zero ");
                return(false);
            }

            if (len > 0)
            {
                if ((len <= bytecapacity) && (depthcapacity < maxdepth))
                {
                    return(true);
                }
                Deallocate();
            }

            isvalid              = false;
            bytecapacity         = 0; // will only set it to len after allocations are a success
            n_structural_indexes = 0;
            uint32_t max_structures = (uint32_t)ROUNDUP_N(len, 64) + 2 + 7;

            structural_indexes = Utils.allocate <uint32_t>(max_structures);
            size_t localtapecapacity   = ROUNDUP_N(len, 64);
            size_t localstringcapacity = ROUNDUP_N(len, 64);

            string_buf = Utils.allocate <uint8_t>(localstringcapacity);
            tape       = Utils.allocate <uint64_t>(localtapecapacity);
            containing_scope_offset = Utils.allocate <uint32_t>(maxdepth);
            ret_address             = Utils.allocate <bytechar>(maxdepth);
            if ((string_buf == null) || (tape == null) ||
                (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null))
            {
                Deallocate();
                return(false);
            }

            bytecapacity   = len;
            depthcapacity  = maxdepth;
            tapecapacity   = localtapecapacity;
            stringcapacity = localstringcapacity;
            return(true);
        }
コード例 #8
0
        private void Deallocate()
        {
            isvalid        = false;
            bytecapacity   = 0;
            depthcapacity  = 0;
            tapecapacity   = 0;
            stringcapacity = 0;

            if (ret_address != null)
            {
                delete(ret_address);
                ret_address = null;
            }

            if (containing_scope_offset != null)
            {
                delete(containing_scope_offset);
                containing_scope_offset = null;
            }

            if (tape != null)
            {
                delete(tape);
                tape = null;
            }

            if (string_buf != null)
            {
                delete(string_buf);
                string_buf = null;
            }

            if (structural_indexes != null)
            {
                delete(structural_indexes);
                structural_indexes = null;
            }
        }
コード例 #9
0
        /**
         * This function certificate the period of 2^{MEXP}
         * @param sfmt SFMT internal state
         */
        void period_certification()
        {
            fixed(w128_t *state = this.state)
            {
                uint32_t *psfmt32 = &state[0].u32_0;

                uint inner = 0;

                for (int i = 0; i < 4; i++)
                {
                    inner ^= psfmt32[i] & parity[i];
                }
                for (int i = 16; i > 0; i >>= 1)
                {
                    inner ^= inner >> i;
                }
                inner &= 1;
                /* check OK */
                if (inner == 1)
                {
                    return;
                }
                /* check NG, and modification */
                for (int i = 0; i < 4; i++)
                {
                    uint32_t work = 1;
                    for (int j = 0; j < 32; j++)
                    {
                        if ((work & parity[i]) != 0)
                        {
                            psfmt32[i] ^= work;
                            return;
                        }
                        work = work << 1;
                    }
                }
            }
        }
コード例 #10
0
        internal static bool find_structural_bits(uint8_t* buf, size_t len, ParsedJson pj)
        {
            if (len > pj.bytecapacity)
            {
                Console.WriteLine("Your ParsedJson object only supports documents up to " + pj.bytecapacity +
                                  " bytes but you are trying to process " + len + " bytes\n");
                return false;
            }

            uint32_t* base_ptr = pj.structural_indexes;
            uint32_t @base = 0;
#if SIMDJSON_UTF8VALIDATE // NOT TESTED YET!
            var has_error = Vector256<byte>.Zero;
            var previous = new avx_processed_utf_bytes();
            previous.rawbytes = Vector256<byte>.Zero;
            previous.high_nibbles = Vector256<byte>.Zero;
            previous.carried_continuations = Vector256<byte>.Zero;
            var highbit = Vector256.Create((byte)0x80);
#endif

            const uint64_t even_bits = 0x5555555555555555UL;
            const uint64_t odd_bits = ~even_bits;

            // for now, just work in 64-byte chunks
            // we have padded the input out to 64 byte multiple with the remainder being
            // zeros

            // persistent state across loop
            uint64_t prev_iter_ends_odd_backslash = 0UL; // either 0 or 1, but a 64-bit value
            uint64_t prev_iter_inside_quote = 0UL; // either all zeros or all ones

            // effectively the very first char is considered to follow "whitespace" for the
            // purposes of psuedo-structural character detection
            uint64_t prev_iter_ends_pseudo_pred = 1UL;
            size_t lenminus64 = len < 64 ? 0 : len - 64;
            size_t idx = 0;
            uint64_t structurals = 0;

            // C#: assign static readonly fields to locals before the loop
            Vector256<byte> low_nibble_mask = s_low_nibble_mask;
            Vector256<byte> high_nibble_mask = s_high_nibble_mask;
            Vector256<byte> utf8ValidVec = s_utf8ValidVec;

            var structural_shufti_mask = Vector256.Create((byte)0x7);
            var whitespace_shufti_mask = Vector256.Create((byte)0x18);
            var slashVec = Vector256.Create((bytechar) '\\').AsByte();
            var ffVec = Vector128.Create((byte) 0xFF).AsUInt64();
            var doubleQuoteVec = Vector256.Create((byte)'"');
            var zeroBVec = Vector256.Create((byte) 0);
            var vec7f = Vector256.Create((byte) 0x7f);

            for (; idx < lenminus64; idx += 64)
            {
                var input_lo = Avx.LoadVector256(buf + idx + 0);
                var input_hi = Avx.LoadVector256(buf + idx + 32);
#if SIMDJSON_UTF8VALIDATE // NOT TESTED YET!
                if ((Avx.TestZ(Avx2.Or(input_lo, input_hi), highbit)) == true)
                {
                    // it is ascii, we just check continuation
                    has_error = Avx2.Or(
                        Avx2.CompareGreaterThan(previous.carried_continuations.AsSByte(), utf8ValidVec, has_error);

                }
                else
                {
                    // it is not ascii so we have to do heavy work
                    previous = Utf8Validation.avxcheckUTF8Bytes(input_lo, ref previous, ref has_error);
                    previous = Utf8Validation.avxcheckUTF8Bytes(input_hi, ref previous, ref has_error);
                }
#endif

                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 1: detect odd sequences of backslashes
                ////////////////////////////////////////////////////////////////////////////////////////////
                /// 
                uint64_t bs_bits =
                    cmp_mask_against_input(input_lo, input_hi, slashVec);
                uint64_t start_edges = bs_bits & ~(bs_bits << 1);
                // flip lowest if we have an odd-length run at the end of the prior
                // iteration
                uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                uint64_t even_starts = start_edges & even_start_mask;
                uint64_t odd_starts = start_edges & ~even_start_mask;
                uint64_t even_carries = bs_bits + even_starts;
                uint64_t odd_carries;
                // must record the carry-out of our odd-carries out of bit 63; this
                // indicates whether the sense of any edge going to the next iteration
                // should be flipped
                bool iter_ends_odd_backslash =
                    add_overflow(bs_bits, odd_starts, &odd_carries);

                odd_carries |=
                    prev_iter_ends_odd_backslash; // push in bit zero as a potential end
                // if we had an odd-numbered run at the
                // end of the previous iteration
                prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1UL : 0x0UL;
                uint64_t even_carry_ends = even_carries & ~bs_bits;
                uint64_t odd_carry_ends = odd_carries & ~bs_bits;
                uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                uint64_t odd_ends = even_start_odd_end | odd_start_even_end;

                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 2: detect insides of quote pairs
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t quote_bits =
                    cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec);
                quote_bits = quote_bits & ~odd_ends;
                uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply(
                    Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0));

                uint32_t cnt = (uint32_t) hamming(structurals);
                uint32_t next_base = @base + cnt;
                while (structurals != 0)
                {
                    base_ptr[@base + 0] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 1] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 2] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 3] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 4] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 5] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 6] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 7] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    @base += 8;
                }

                @base = next_base;

                quote_mask ^= prev_iter_inside_quote;
                prev_iter_inside_quote =
                    (uint64_t) ((int64_t) quote_mask >>
                                63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code



                var v_lo = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_lo),
                    Avx2.Shuffle(high_nibble_mask,
                        Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(),
                            vec7f)));

                var v_hi = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_hi),
                    Avx2.Shuffle(high_nibble_mask,
                        Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(),
                            vec7f)));
                var tmp_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, structural_shufti_mask), zeroBVec);
                var tmp_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, structural_shufti_mask), zeroBVec);

                uint64_t structural_res_0 = (uint32_t) Avx2.MoveMask(tmp_lo);
                uint64_t structural_res_1 = (uint64_t) Avx2.MoveMask(tmp_hi);
                structurals = ~(structural_res_0 | (structural_res_1 << 32));

                var tmp_ws_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec);
                var tmp_ws_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec);

                uint64_t ws_res_0 = (uint32_t) Avx2.MoveMask(tmp_ws_lo);
                uint64_t ws_res_1 = (uint64_t) Avx2.MoveMask(tmp_ws_hi);
                uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));


                // mask off anything inside quotes
                structurals &= ~quote_mask;

                // add the real quote bits back into our bitmask as well, so we can
                // quickly traverse the strings we've spent all this trouble gathering
                structurals |= quote_bits;

                // Now, establish "pseudo-structural characters". These are non-whitespace
                // characters that are (a) outside quotes and (b) have a predecessor that's
                // either whitespace or a structural character. This means that subsequent
                // passes will get a chance to encounter the first character of every string
                // of non-whitespace and, if we're parsing an atom like true/false/null or a
                // number we can stop at the first whitespace or structural character
                // following it.

                // a qualified predecessor is something that can happen 1 position before an
                // psuedo-structural character
                uint64_t pseudo_pred = structurals | whitespace;
                uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
                prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
                uint64_t pseudo_structurals =
                    shifted_pseudo_pred & (~whitespace) & (~quote_mask);
                structurals |= pseudo_structurals;

                // now, we've used our close quotes all we need to. So let's switch them off
                // they will be off in the quote mask and on in quote bits.
                structurals &= ~(quote_bits & ~quote_mask);

                //Console.WriteLine($"Iter: {idx}, satur: {structurals}");

                //*(uint64_t *)(pj.structurals + idx / 8) = structurals;
            }

            ////////////////
            /// we use a giant copy-paste which is ugly.
            /// but otherwise the string needs to be properly padded or else we
            /// risk invalidating the UTF-8 checks.
            ////////////
            if (idx < len)
            {
                uint8_t* tmpbuf = stackalloc uint8_t[64];
                memset(tmpbuf, 0x20, 64);
                memcpy(tmpbuf, buf + idx, len - idx);
                Vector256<byte> input_lo = Avx.LoadVector256(tmpbuf + 0);
                Vector256<byte> input_hi = Avx.LoadVector256(tmpbuf + 32);
#if SIMDJSON_UTF8VALIDATE // NOT TESTED YET!
                var highbit = Vector256.Create((byte)0x80);
                if ((Avx.TestZ(Avx2.Or(input_lo, input_hi), highbit)) == true)
                {
                    // it is ascii, we just check continuation
                    has_error = Avx2.Or(
                      Avx2.CompareGreaterThan(previous.carried_continuations.AsSByte(),
                                      utf8ValidVec).AsByte(), has_error);

                }
                else
                {
                    // it is not ascii so we have to do heavy work
                    previous = Utf8Validation.avxcheckUTF8Bytes(input_lo, ref previous, ref has_error);
                    previous = Utf8Validation.avxcheckUTF8Bytes(input_hi, ref previous, ref has_error);
                }
#endif
                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 1: detect odd sequences of backslashes
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t bs_bits =
                    cmp_mask_against_input(input_lo, input_hi, slashVec);
                uint64_t start_edges = bs_bits & ~(bs_bits << 1);
                // flip lowest if we have an odd-length run at the end of the prior
                // iteration
                uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                uint64_t even_starts = start_edges & even_start_mask;
                uint64_t odd_starts = start_edges & ~even_start_mask;
                uint64_t even_carries = bs_bits + even_starts;

                uint64_t odd_carries;
                // must record the carry-out of our odd-carries out of bit 63; this
                // indicates whether the sense of any edge going to the next iteration
                // should be flipped
                //bool iter_ends_odd_backslash =
                add_overflow(bs_bits, odd_starts, &odd_carries);

                odd_carries |=
                    prev_iter_ends_odd_backslash; // push in bit zero as a potential end
                // if we had an odd-numbered run at the
                // end of the previous iteration
                //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
                uint64_t even_carry_ends = even_carries & ~bs_bits;
                uint64_t odd_carry_ends = odd_carries & ~bs_bits;
                uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                uint64_t odd_ends = even_start_odd_end | odd_start_even_end;

                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 2: detect insides of quote pairs
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t quote_bits =
                    cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec);
                quote_bits = quote_bits & ~odd_ends;
                uint64_t quote_mask = (uint64_t)Sse2.X64.ConvertToInt64(Pclmulqdq.CarrylessMultiply(
                    Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0).AsInt64());
                quote_mask ^= prev_iter_inside_quote;

                //BUG? https://github.com/dotnet/coreclr/issues/22813
                //quote_mask = 60;
                //prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20

                uint32_t cnt = (uint32_t)hamming(structurals);
                uint32_t next_base = @base + cnt;
                while (structurals != 0)
                {
                    base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals = structurals & (structurals - 1);
                    @base += 8;
                }
                @base = next_base;
                // How do we build up a user traversable data structure
                // first, do a 'shufti' to detect structural JSON characters
                // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
                // these go into the first 3 buckets of the comparison (1/2/4)

                // we are also interested in the four whitespace characters
                // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
                // these go into the next 2 buckets of the comparison (8/16)

                var v_lo = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_lo),
                    Avx2.Shuffle(high_nibble_mask,
                        Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(),
                            vec7f)));

                var v_hi = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_hi),
                    Avx2.Shuffle(high_nibble_mask,
                        Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(),
                            vec7f)));
                var tmp_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, structural_shufti_mask), zeroBVec);
                var tmp_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, structural_shufti_mask), zeroBVec);

                uint64_t structural_res_0 = (uint32_t)Avx2.MoveMask(tmp_lo);
                uint64_t structural_res_1 = (uint64_t)Avx2.MoveMask(tmp_hi);
                structurals = ~(structural_res_0 | (structural_res_1 << 32));

                // this additional mask and transfer is non-trivially expensive,
                // unfortunately
                var tmp_ws_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec);
                var tmp_ws_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec);

                uint64_t ws_res_0 = (uint32_t)Avx2.MoveMask(tmp_ws_lo);
                uint64_t ws_res_1 = (uint64_t)Avx2.MoveMask(tmp_ws_hi);
                uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));


                // mask off anything inside quotes
                structurals &= ~quote_mask;

                // add the real quote bits back into our bitmask as well, so we can
                // quickly traverse the strings we've spent all this trouble gathering
                structurals |= quote_bits;

                // Now, establish "pseudo-structural characters". These are non-whitespace
                // characters that are (a) outside quotes and (b) have a predecessor that's
                // either whitespace or a structural character. This means that subsequent
                // passes will get a chance to encounter the first character of every string
                // of non-whitespace and, if we're parsing an atom like true/false/null or a
                // number we can stop at the first whitespace or structural character
                // following it.

                // a qualified predecessor is something that can happen 1 position before an
                // psuedo-structural character
                uint64_t pseudo_pred = structurals | whitespace;
                uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
                prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
                uint64_t pseudo_structurals =
                    shifted_pseudo_pred & (~whitespace) & (~quote_mask);
                structurals |= pseudo_structurals;

                // now, we've used our close quotes all we need to. So let's switch them off
                // they will be off in the quote mask and on in quote bits.
                structurals &= ~(quote_bits & ~quote_mask);
                //*(uint64_t *)(pj.structurals + idx / 8) = structurals;
                idx += 64;
            }
            uint32_t cnt2 = (uint32_t)hamming(structurals);
            uint32_t next_base2 = @base + cnt2;
            while (structurals != 0)
            {
                base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals = structurals & (structurals - 1);
                @base += 8;
            }
            @base = next_base2;

            pj.n_structural_indexes = @base;
            if (base_ptr[pj.n_structural_indexes - 1] > len)
            {
                throw new InvalidOperationException("Internal bug");
            }
            if (len != base_ptr[pj.n_structural_indexes - 1])
            {
                // the string might not be NULL terminated, but we add a virtual NULL ending character. 
                base_ptr[pj.n_structural_indexes++] = (uint32_t)len;
            }
            base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array

#if SIMDJSON_UTF8VALIDATE // NOT TESTED YET!
            return Avx.TestZ(has_error, has_error);
#else
            return true;
#endif
        }
コード例 #11
0
 public void dsfmt_init_by_array(uint32_t *init_key, uint key_length)
 {
     dsfmt_chk_init_by_array(init_key, key_length);
 }
コード例 #12
0
        /**
         * This function initializes the internal state array,
         * with an array of 32-bit integers used as the seeds
         * @param dsfmt dsfmt state vector.
         * @param init_key the array of 32-bit integers, used as a seed.
         * @param key_length the length of init_key.
         * @param mexp caller's mersenne expornent
         */
        void dsfmt_chk_init_by_array(uint32_t *init_key, uint key_length)
        {
            uint      i, j, count;
            uint32_t  r;
            const int size = (DSFMT_N + 1) * 4;   /* pulmonary */
            const int lag  =
                (size >= 623) ? 11 :
                (size >= 68) ? 7 :
                (size >= 39) ? 5 : 3;
            const int mid = (size - lag) / 2;

            fixed(w128_t *status = this.status)
            {
                var psfmt32 = &status[0].u32_0;

                //memset(status, 0x8b, sizeof(status));
                for (var idx = 0; idx <= DSFMT_N; ++idx)
                {
                    status[idx].u64_0 = 0x8b8b8b8b8b8b8b8bUL;
                    status[idx].u64_1 = 0x8b8b8b8b8b8b8b8bUL;
                }
                if (key_length + 1 > size)
                {
                    count = key_length + 1;
                }
                else
                {
                    count = size;
                }
                r = ini_func1(psfmt32[0] ^ psfmt32[mid % size] ^ psfmt32[(size - 1) % size]);
                psfmt32[mid % size] += r;
                r += key_length;
                psfmt32[(mid + lag) % size] += r;
                psfmt32[0] = r;
                count--;
                for (i = 1, j = 0; (j < count) && (j < key_length); j++)
                {
                    r = ini_func1(psfmt32[i] ^ psfmt32[(i + mid) % size] ^ psfmt32[(i + size - 1) % size]);
                    psfmt32[(i + mid) % size] += r;
                    r += init_key[j] + i;
                    psfmt32[(i + mid + lag) % size] += r;
                    psfmt32[i] = r;
                    i          = (i + 1) % size;
                }
                for (; j < count; j++)
                {
                    r = ini_func1(psfmt32[i] ^ psfmt32[(i + mid) % size] ^ psfmt32[(i + size - 1) % size]);
                    psfmt32[(i + mid) % size] += r;
                    r += i;
                    psfmt32[(i + mid + lag) % size] += r;
                    psfmt32[i] = r;
                    i          = (i + 1) % size;
                }
                for (j = 0; j < size; j++)
                {
                    r = ini_func2(psfmt32[i] + psfmt32[(i + mid) % size] + psfmt32[(i + size - 1) % size]);
                    psfmt32[(i + mid) % size] ^= r;
                    r -= i;
                    psfmt32[(i + mid + lag) % size] ^= r;
                    psfmt32[i] = r;
                    i          = (i + 1) % size;
                }
            }

            initial_mask();
            period_certification();
            idx = DSFMT_N64;
        }
コード例 #13
0
        /**
         * This function initializes the internal state array,
         * with an array of 32-bit integers used as the seeds
         * @param sfmt SFMT internal state
         * @param init_key the array of 32-bit integers, used as a seed.
         * @param key_length the length of init_key.
         */
        public void sfmt_init_by_array(uint32_t *init_key, uint key_length)
        {
            const int size = SFMT_N * 4;
            const int lag  =
                (size >= 623) ? 11 :
                (size >= 68) ? 7 :
                (size >= 39) ? 5 : 3;
            const int mid = (size - lag) / 2;

            fixed(w128_t *state = this.state)
            {
                uint32_t *psfmt32 = &state[0].u32_0;

                //memset(psfmt32, 0x8b, sizeof(sfmt_t));
                for (var idx = 0; idx < SFMT_N; ++idx)
                {
                    state[idx].u32_0 = 0x8b8b8b8bU;
                    state[idx].u32_1 = 0x8b8b8b8bU;
                    state[idx].u32_2 = 0x8b8b8b8bU;
                    state[idx].u32_3 = 0x8b8b8b8bU;
                }

                uint count;

                if (key_length + 1 > SFMT_N32)
                {
                    count = key_length + 1;
                }
                else
                {
                    count = SFMT_N32;
                }

                uint32_t r;

                r                   = func1(psfmt32[0] ^ psfmt32[mid] ^ psfmt32[SFMT_N32 - 1]);
                psfmt32[mid]       += r;
                r                  += key_length;
                psfmt32[mid + lag] += r;
                psfmt32[0]          = r;

                count--;
                uint i, j;

                for (i = 1, j = 0; (j < count) && (j < key_length); j++)
                {
                    r = func1(psfmt32[i] ^ psfmt32[(i + mid) % SFMT_N32] ^ psfmt32[(i + SFMT_N32 - 1) % SFMT_N32]);
                    psfmt32[(i + mid) % SFMT_N32] += r;
                    r += init_key[j] + i;
                    psfmt32[(i + mid + lag) % SFMT_N32] += r;
                    psfmt32[i] = r;
                    i          = (i + 1) % SFMT_N32;
                }
                for (; j < count; j++)
                {
                    r = func1(psfmt32[i] ^ psfmt32[(i + mid) % SFMT_N32] ^ psfmt32[(i + SFMT_N32 - 1) % SFMT_N32]);
                    psfmt32[(i + mid) % SFMT_N32] += r;
                    r += i;
                    psfmt32[(i + mid + lag) % SFMT_N32] += r;
                    psfmt32[i] = r;
                    i          = (i + 1) % SFMT_N32;
                }
                for (j = 0; j < SFMT_N32; j++)
                {
                    r = func2(psfmt32[i] + psfmt32[(i + mid) % SFMT_N32] + psfmt32[(i + SFMT_N32 - 1) % SFMT_N32]);
                    psfmt32[(i + mid) % SFMT_N32] ^= r;
                    r -= i;
                    psfmt32[(i + mid + lag) % SFMT_N32] ^= r;
                    psfmt32[i] = r;
                    i          = (i + 1) % SFMT_N32;
                }
            }

            idx = SFMT_N32;
            period_certification();
        }
コード例 #14
0
        internal static JsonParseError find_structural_bits(uint8_t *buf, size_t len, ParsedJson pj)
        {
            if (len > pj.bytecapacity)
            {
                return(JsonParseError.CAPACITY);
            }

            uint32_t *base_ptr = pj.structural_indexes;
            uint32_t  @base    = 0;

#if SIMDJSON_UTF8VALIDATE
            utf8_checking_state state;
#endif

            // we have padded the input out to 64 byte multiple with the remainder being
            // zeros

            // persistent state across loop
            // does the last iteration end with an odd-length sequence of backslashes?
            // either 0 or 1, but a 64-bit value
            uint64_t prev_iter_ends_odd_backslash = 0UL;
            // does the previous iteration end inside a double-quote pair?
            uint64_t prev_iter_inside_quote = 0UL; // either all zeros or all ones
            // does the previous iteration end on something that is a predecessor of a
            // pseudo-structural character - i.e. whitespace or a structural character
            // effectively the very first char is considered to follow "whitespace" for
            // the
            // purposes of pseudo-structural character detection so we initialize to 1
            uint64_t prev_iter_ends_pseudo_pred = 1UL;

            // structurals are persistent state across loop as we flatten them on the
            // subsequent iteration into our array pointed to be base_ptr.
            // This is harmless on the first iteration as structurals==0
            // and is done for performance reasons; we can hide some of the latency of the
            // expensive carryless multiply in the previous step with this work
            uint64_t structurals = 0;

            size_t   lenminus64 = len < 64 ? 0 : len - 64;
            size_t   idx        = 0;
            uint64_t error_mask = 0; // for unescaped characters within strings (ASCII code points < 0x20)

            for (; idx < lenminus64; idx += 64)
            {
                //__builtin_prefetch(buf + idx + 128);
                simd_input @in = fill_input(buf + idx);
#if SIMDJSON_UTF8VALIDATE
                check_utf8(in, state);
#endif
                // detect odd sequences of backslashes
                uint64_t odd_ends = find_odd_backslash_sequences(
                    @in, ref prev_iter_ends_odd_backslash);

                // detect insides of quote pairs ("quote_mask") and also our quote_bits
                // themselves
                uint64_t quote_bits = 0;
                uint64_t quote_mask = find_quote_mask_and_bits(
                    @in, odd_ends, ref prev_iter_inside_quote, ref quote_bits, ref error_mask);

                // take the previous iterations structural bits, not our current iteration,
                // and flatten
                flatten_bits(base_ptr, ref @base, (uint32_t)idx, structurals);

                uint64_t whitespace = 0;
                find_whitespace_and_structurals(@in, ref whitespace, ref structurals);

                // fixup structurals to reflect quotes and add pseudo-structural characters
                structurals = finalize_structurals(structurals, whitespace, quote_mask,
                                                   quote_bits, ref prev_iter_ends_pseudo_pred);
            }

            ////////////////
            // we use a giant copy-paste which is ugly.
            // but otherwise the string needs to be properly padded or else we
            // risk invalidating the UTF-8 checks.
            ////////////
            if (idx < len)
            {
                uint8_t *tmpbuf = stackalloc uint8_t[64];
                memset(tmpbuf, 0x20, 64);
                memcpy(tmpbuf, buf + idx, len - idx);
                simd_input @in = fill_input(tmpbuf);
#if SIMDJSON_UTF8VALIDATE
                check_utf8 <T>(in, state);
#endif
                // detect odd sequences of backslashes
                uint64_t odd_ends = find_odd_backslash_sequences(
                    @in, ref prev_iter_ends_odd_backslash);

                // detect insides of quote pairs ("quote_mask") and also our quote_bits
                // themselves
                uint64_t quote_bits = 0;
                uint64_t quote_mask = find_quote_mask_and_bits(
                    @in, odd_ends, ref prev_iter_inside_quote, ref quote_bits, ref error_mask);

                // take the previous iterations structural bits, not our current iteration,
                // and flatten
                flatten_bits(base_ptr, ref @base, (uint)idx, structurals);

                uint64_t whitespace = 0;
                find_whitespace_and_structurals(@in, ref whitespace, ref structurals);

                // fixup structurals to reflect quotes and add pseudo-strucural characters
                structurals = finalize_structurals(structurals, whitespace, quote_mask,
                                                   quote_bits, ref prev_iter_ends_pseudo_pred);
                idx += 64;
            }

            // is last string quote closed?
            if (prev_iter_inside_quote != 0)
            {
                return(JsonParseError.UNCLOSED_STRING);
            }

            // finally, flatten out the remaining structurals from the last iteration
            flatten_bits(base_ptr, ref @base, (uint)idx, structurals);

            pj.n_structural_indexes = @base;
            // a valid JSON file cannot have zero structural indexes - we should have
            // found something
            if (pj.n_structural_indexes == 0u)
            {
                return(JsonParseError.EMPTY);
            }

            if (base_ptr[pj.n_structural_indexes - 1] > len)
            {
                return(JsonParseError.UNEXPECTED_ERROR);
            }

            if (len != base_ptr[pj.n_structural_indexes - 1])
            {
                // the string might not be NULL terminated, but we add a virtual NULL ending
                // character.
                base_ptr[pj.n_structural_indexes++] = (uint)len;
            }

            // make it safe to dereference one beyond this array
            base_ptr[pj.n_structural_indexes] = 0;
            if (error_mask != 0)
            {
                return(JsonParseError.UNESCAPED_CHARS);
            }
#if SIMDJSON_UTF8VALIDATE
            return(check_utf8_errors(state));
#else
            return(JsonParseError.SUCCESS);
#endif
        }
コード例 #15
0
        internal static void flatten_bits(uint32_t *base_ptr, ref uint32_t @base, uint32_t idx, uint64_t bits)
        {
            // In some instances, the next branch is expensive because it is mispredicted.
            // Unfortunately, in other cases,
            // it helps tremendously.
            if (bits == 0)
            {
                return;
            }
            uint32_t cnt       = (uint32_t)hamming(bits);
            uint32_t next_base = @base + cnt;

            idx      -= 64;
            base_ptr += @base;
            {
                base_ptr[0] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[1] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[2] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[3] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[4] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[5] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[6] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[7] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr   += 8;
            }
            // We hope that the next branch is easily predicted.
            if (cnt > 8)
            {
                base_ptr[0] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[1] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[2] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[3] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[4] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[5] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[6] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr[7] = (uint32_t)(idx + trailingzeroes(bits));
                bits        = bits & (bits - 1);
                base_ptr   += 8;
            }
            if (cnt > 16)
            { // unluckly: we rarely get here
              // since it means having one structural or pseudo-structral element
              // every 4 characters (possible with inputs like "","","",...).
                do
                {
                    base_ptr[0] = (uint32_t)(idx + trailingzeroes(bits));
                    bits        = bits & (bits - 1);
                    base_ptr++;
                } while (bits != 0);
            }
            @base = next_base;
        }
コード例 #16
0
 public static extern void sort_ispc(int32_t n, uint32_t *code, int32_t *order, int32_t ntasks) /*x28*/;