예제 #1
0
        public static bool json_parse(uint8_t *buf, size_t len, ParsedJson *pj, bool reallocifneeded = true)
        {
            if (pj->bytecapacity < len)
            {
                Debug.WriteLine("Your ParsedJson cannot support documents that big: " + len);
                return(false);
            }
            bool reallocated = false;

            if (reallocifneeded)
            {
                // realloc is needed if the end of the memory crosses a page
                long pagesize = System.Environment.SystemPageSize;

                if (((size_t)(buf + len - 1) % (size_t)pagesize) < SIMDJSON_PADDING)
                {
                    uint8_t *tmpbuf = buf;
                    buf = (uint8_t *)Utils.allocate_padded_buffer(len);
                    if (buf == null)
                    {
                        return(false);
                    }
                    memcpy((void *)buf, tmpbuf, len);
                    reallocated = true;
                }
            }
            bool isok = stage1_find_marks.find_structural_bits(buf, len, pj);

            if (isok)
            {
                isok = stage2_build_tape.unified_machine(buf, len, pj);
            }
            else
            {
                if (reallocated)
                {
                    Utils.free((void *)buf);
                }
                return(false);
            }
            if (reallocated)
            {
                Utils.free((void *)buf);
            }
            return(isok);
        }
예제 #2
0
        public iterator(ParsedJson *pj)
        {
            this.pj      = pj;
            depth        = 0;
            location     = 0;
            tape_length  = 0;
            depthindex   = null;
            current_type = 0;
            current_val  = 0;

            if (pj->isValid())
            {
                depthindex = allocate <scopeindex_t>(pj->depthcapacity);
                if (depthindex == null)
                {
                    return;
                }

                depthindex[0].start_of_scope = location;
                current_val              = pj->tape[location++];
                current_type             = (uint8_t)(current_val >> 56);
                depthindex[0].scope_type = current_type;
                if (current_type == 'r')
                {
                    tape_length = current_val & JSONVALUEMASK;
                    if (location < tape_length)
                    {
                        current_val  = pj->tape[location];
                        current_type = (uint8_t)(current_val >> 56);
                        depth++;
                        depthindex[depth].start_of_scope = location;
                        depthindex[depth].scope_type     = current_type;
                    }
                }
            }
            else
            {
                throw new InvalidOperationException("Json is invalid");
            }
        }
예제 #3
0
        public static bool parse_string(uint8_t *buf, size_t len, ParsedJson *pj, uint32_t depth, uint32_t offset)
        {
#if SIMDJSON_SKIPSTRINGPARSING               // for performance analysis, it is sometimes useful to skip parsing
            pj->write_tape(0, '"');          // don't bother with the string parsing at all
            return(true);                    // always succeeds
#else
            uint8_t *src = &buf[offset + 1]; // we know that buf at offset is a "
            uint8_t *dst = pj->current_string_buf_loc;
#if JSON_TEST_STRINGS                        // for unit testing
            uint8_t *const start_of_string = dst;
#endif
            var slashVec = Vector256.Create((byte)'\\');
            var quoteVec = Vector256.Create((byte)'"');

            while (true)
            {
                Vector256 <byte> v       = Avx2.LoadVector256((src));
                uint32_t         bs_bits =
                    (uint32_t)Avx2.MoveMask(Avx2.CompareEqual(v, slashVec));
                uint32_t quote_bits =
                    (uint32_t)Avx2.MoveMask(Avx2.CompareEqual(v, quoteVec));
                // All Unicode characters may be placed within the
                // quotation marks, except for the characters that MUST be escaped:
                // quotation mark, reverse solidus, and the control characters (U+0000
                //through U+001F).
                // https://tools.ietf.org/html/rfc8259
#if CHECKUNESCAPED
                var unitsep       = Vector256.Create((byte)0x1F);
                var unescaped_vec =
                    Avx2.CompareEqual(Avx2.Max(unitsep, v), unitsep); // could do it with saturated subtraction
#endif // CHECKUNESCAPED

                uint32_t quote_dist = (uint32_t)trailingzeroes(quote_bits);
                uint32_t bs_dist    = (uint32_t)trailingzeroes(bs_bits);
                // store to dest unconditionally - we can overwrite the bits we don't like
                // later
                Avx.Store((dst), v);
                if (quote_dist < bs_dist)
                {
                    // we encountered quotes first. Move dst to point to quotes and exit
                    dst[quote_dist] = 0; // null terminate and get out

                    pj->write_tape((size_t)pj->current_string_buf_loc - (size_t)pj->string_buf, (uint8_t)'"');

                    pj->current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value
#if CHECKUNESCAPED
                    // check that there is no unescaped char before the quote
                    uint32_t unescaped_bits = (uint32_t)Avx2.MoveMask(unescaped_vec);
                    bool     is_ok          = ((quote_bits - 1) & (~quote_bits) & unescaped_bits) == 0;
#if JSON_TEST_STRINGS // for unit testing
                    if (is_ok)
                    {
                        foundString(buf + offset, start_of_string, pj->current_string_buf_loc - 1);
                    }
                    else
                    {
                        foundBadString(buf + offset);
                    }
#endif // JSON_TEST_STRINGS
                    return(is_ok);
#else //CHECKUNESCAPED
#if JSON_TEST_STRINGS // for unit testing
                    foundString(buf + offset, start_of_string, pj->current_string_buf_loc - 1);
#endif // JSON_TEST_STRINGS
                    return(true);
#endif //CHECKUNESCAPED
                }
                else if (quote_dist > bs_dist)
                {
                    uint8_t escape_char = src[bs_dist + 1];
#if CHECKUNESCAPED
                    // we are going to need the unescaped_bits to check for unescaped chars
                    uint32_t unescaped_bits = (uint32_t)Avx2.MoveMask(unescaped_vec);
                    if (((bs_bits - 1) & (~bs_bits) & unescaped_bits) != 0)
                    {
#if JSON_TEST_STRINGS // for unit testing
                        foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS
                        return(false);
                    }
#endif //CHECKUNESCAPED
                    // we encountered backslash first. Handle backslash
                    if (escape_char == 'u')
                    {
                        // move src/dst up to the start; they will be further adjusted
                        // within the unicode codepoint handling code.
                        src += bs_dist;
                        dst += bs_dist;
                        if (!handle_unicode_codepoint(&src, &dst))
                        {
#if JSON_TEST_STRINGS // for unit testing
                            foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS
                            return(false);
                        }
                    }
                    else
                    {
                        // simple 1:1 conversion. Will eat bs_dist+2 characters in input and
                        // write bs_dist+1 characters to output
                        // note this may reach beyond the part of the buffer we've actually
                        // seen. I think this is ok
                        uint8_t escape_result = escape_map[escape_char];
                        if (escape_result == 0)
                        {
#if JSON_TEST_STRINGS // for unit testing
                            foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS
                            return(false); // bogus escape value is an error
                        }

                        dst[bs_dist] = escape_result;
                        src         += bs_dist + 2;
                        dst         += bs_dist + 1;
                    }
                }
                else
                {
                    // they are the same. Since they can't co-occur, it means we encountered
                    // neither.
                    src += 32;
                    dst += 32;
#if CHECKUNESCAPED
                    // check for unescaped chars
                    if (Avx.TestZ(unescaped_vec, unescaped_vec) != true)
                    {
#if JSON_TEST_STRINGS // for unit testing
                        foundBadString(buf + offset);
#endif // JSON_TEST_STRINGS
                        return(false);
                    }
#endif // CHECKUNESCAPED
                }
            }

            // can't be reached
            return(true);
#endif // SIMDJSON_SKIPSTRINGPARSING
        }
예제 #4
0
        public static bool parse_number(uint8_t *buf, ParsedJson *pj, uint32_t offset, bool found_minus)
        {
            bytechar *p        = (bytechar *)(buf + offset);
            bool      negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
                if (!is_integer(*p))
                {
                    // a negative sign must be followed by an integer

                    return(false);
                }
            }

            bytechar *startdigits = p;

            int64_t i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p)))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                i = 0;
            }
            else
            {
                if (!(is_integer(*p)))
                {
                    // must start with an integer
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (unsigned_bytechar)(*p - '0');
                    i     = 10 * i + digit; // might overflow
                    ++p;
                }
            }

            int64_t exponent = 0;

            if ('.' == *p)
            {
                ++p;
                bytechar *firstafterperiod = p;
                if (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    i = i * 10 + digit;
                }
                else
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }
#if SWAR_NUMBER_PARSING
                // this helps if we have lots of decimals!
                // this turns out to be frequent enough.
                if (is_made_of_eight_digits_fast(p))
                {
                    i  = i * 100000000 + parse_eight_digits_unrolled(p);
                    p += 8;
                    // exponent -= 8;
                }
#endif
                while (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later.
                }

                exponent = firstafterperiod - p;
            }

            int digitcount = (int)(p - startdigits - 1);

            int64_t expnumber = 0; // exponential part
            if (('e' == *p) || ('E' == *p))
            {
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                expnumber = digit;
                p++;
                while (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    // we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                exponent += (negexp ? -expnumber : expnumber);
            }

            i = negative ? -i : i;
            if ((exponent != 0) || (expnumber != 0))
            {
                if ((digitcount >= 19))
                {
                    // this is uncommon!!!
                    // this is almost never going to get called!!!
                    // we start anew, going slowly!!!
                    return(parse_float(buf, pj, offset,
                                       found_minus));
                }

                ///////////
                // We want 0.1e1 to be a float.
                //////////
                if (i == 0)
                {
                    pj->write_tape_double(0.0);
#if JSON_TEST_NUMBERS // for unit testing
                    foundFloat(0.0, buf + offset);
#endif
                }
                else
                {
                    if ((exponent > 308) || (exponent < -308))
                    {
                        // we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                        foundInvalidNumber(buf + offset);
#endif
                        return(false);
                    }

                    double d = i;
                    d *= power_of_ten[308 + exponent];
                    // d = negative ? -d : d;
                    pj->write_tape_double(d);
#if JSON_TEST_NUMBERS // for unit testing
                    foundFloat(d, buf + offset);
#endif
                }
            }
            else
            {
                if ((digitcount >= 18))
                {
                    // this is uncommon!!!
                    return(parse_large_integer(buf, pj, offset,
                                               found_minus));
                }

                pj->write_tape_s64(i);
#if JSON_TEST_NUMBERS // for unit testing
                foundInteger(i, buf + offset);
#endif
            }

            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }
예제 #5
0
        // called by parse_number when we know that the output is an integer,
        // but where there might be some integer overflow.
        // we want to catch overflows!
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        static bool parse_large_integer(uint8_t *buf, ParsedJson *pj, uint32_t offset, bool found_minus)
        {
            bytechar *p        = (bytechar *)(buf + offset);
            bool      negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            uint64_t i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (unsigned_bytechar)(*p - '0');
                    if (mul_overflow(i, 10, &i))
                    {
#if JSON_TEST_NUMBERS // for unit testing
                        foundInvalidNumber(buf + offset);
#endif
                        return(false); // overflow
                    }

                    if (add_overflow(i, digit, &i))
                    {
#if JSON_TEST_NUMBERS // for unit testing
                        foundInvalidNumber(buf + offset);
#endif
                        return(false); // overflow
                    }

                    ++p;
                }
            }

            if (negative)
            {
                if (i > 0x8000000000000000)
                {
                    // overflows!
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false); // overflow
                }
            }
            else
            {
                if (i >= 0x8000000000000000)
                {
                    // overflows!
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false); // overflow
                }
            }

            int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i;
            pj->write_tape_s64(signed_answer);
#if JSON_TEST_NUMBERS // for unit testing
            foundInteger(signed_answer, buf + offset);
#endif
            return(is_structural_or_whitespace((byte)(*p)) != 0);
        }
예제 #6
0
        // called by parse_number when we know that the output is a float,
        // but where there might be some integer overflow. The trick here is to
        // parse using floats from the start.
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        // Note: a redesign could avoid this function entirely.
        //
        private static bool parse_float(uint8_t *buf, ParsedJson *pj, uint32_t offset, bool found_minus)
        {
            bytechar *p        = (bytechar *)(buf + offset);
            bool      negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            double i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                unsigned_bytechar digit = (unsigned_bytechar)(*p - (bytechar)'0');
                i = digit;
                p++;
                while (is_integer(*p))
                {
                    digit = (unsigned_bytechar)(*p - '0');
                    i     = 10 * i + digit;
                    ++p;
                }
            }

            if ('.' == *p)
            {
                ++p;
                double fractionalweight = 1;
                if (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    fractionalweight *= 0.1;
                    i = i + digit * fractionalweight;
                }
                else
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                while (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    fractionalweight *= 0.1;
                    i = i + digit * fractionalweight;
                }
            }

            if (('e' == *p) || ('E' == *p))
            {
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit     = (unsigned_bytechar)(*p - '0');
                int64_t           expnumber = digit; // exponential part
                p++;
                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
// we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                int exponent = (int)(negexp ? -expnumber : expnumber);
                if ((exponent > 308) || (exponent < -308))
                {
// we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                i *= power_of_ten[308 + exponent];
            }

            if (is_not_structural_or_whitespace((byte)*p) != 0)
            {
                return(false);
            }

            double d = negative ? -i : i;
            pj->write_tape_double(d);
#if JSON_TEST_NUMBERS // for unit testing
            foundFloat(d, buf + offset);
#endif
            return(is_structural_or_whitespace((byte)(*p)) != 0);
        }
예제 #7
0
        public static bool find_structural_bits(uint8_t *buf, size_t len, ParsedJson *pj)
        {
            if (len > pj->bytecapacity)
            {
                Console.WriteLine("Your ParsedJson object only supports documents up to " + pj->bytecapacity +
                                  " bytes but you are trying to process " + len + " bytes\n");
                return(false);
            }

            uint32_t *base_ptr = pj->structural_indexes;
            uint32_t  @base    = 0;

            const uint64_t even_bits = 0x5555555555555555UL;
            const uint64_t odd_bits  = ~even_bits;

            // for now, just work in 64-byte chunks
            // we have padded the input out to 64 byte multiple with the remainder being
            // zeros

            // persistent state across loop
            uint64_t prev_iter_ends_odd_backslash = 0UL; // either 0 or 1, but a 64-bit value
            uint64_t prev_iter_inside_quote       = 0UL; // either all zeros or all ones

            // effectively the very first char is considered to follow "whitespace" for the
            // purposes of psuedo-structural character detection
            uint64_t prev_iter_ends_pseudo_pred = 1UL;
            size_t   lenminus64  = len < 64 ? 0 : len - 64;
            size_t   idx         = 0;
            uint64_t structurals = 0;

            // C#: assign static readonly fields to locals before the loop
            Vector256 <byte> low_nibble_mask  = s_low_nibble_mask;
            Vector256 <byte> high_nibble_mask = s_high_nibble_mask;

            var structural_shufti_mask = Vector256.Create((byte)0x7);
            var whitespace_shufti_mask = Vector256.Create((byte)0x18);
            var slashVec       = Vector256.Create((bytechar)'\\').AsByte();
            var ffVec          = Vector128.Create((byte)0xFF).AsUInt64();
            var doubleQuoteVec = Vector256.Create((byte)'"');
            var zeroBVec       = Vector256.Create((byte)0);
            var vec7f          = Vector256.Create((byte)0x7f);

            for (; idx < lenminus64; idx += 64)
            {
                var input_lo = Avx.LoadVector256(buf + idx + 0);
                var input_hi = Avx.LoadVector256(buf + idx + 32);
                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 1: detect odd sequences of backslashes
                ////////////////////////////////////////////////////////////////////////////////////////////
                ///
                uint64_t bs_bits =
                    cmp_mask_against_input(input_lo, input_hi, slashVec);
                uint64_t start_edges = bs_bits & ~(bs_bits << 1);
                // flip lowest if we have an odd-length run at the end of the prior
                // iteration
                uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                uint64_t even_starts     = start_edges & even_start_mask;
                uint64_t odd_starts      = start_edges & ~even_start_mask;
                uint64_t even_carries    = bs_bits + even_starts;
                uint64_t odd_carries;
                // must record the carry-out of our odd-carries out of bit 63; this
                // indicates whether the sense of any edge going to the next iteration
                // should be flipped
                bool iter_ends_odd_backslash =
                    add_overflow(bs_bits, odd_starts, &odd_carries);

                odd_carries |=
                    prev_iter_ends_odd_backslash; // push in bit zero as a potential end
                // if we had an odd-numbered run at the
                // end of the previous iteration
                prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1UL : 0x0UL;
                uint64_t even_carry_ends    = even_carries & ~bs_bits;
                uint64_t odd_carry_ends     = odd_carries & ~bs_bits;
                uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                uint64_t odd_ends           = even_start_odd_end | odd_start_even_end;

                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 2: detect insides of quote pairs
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t quote_bits =
                    cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec);
                quote_bits = quote_bits & ~odd_ends;
                uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply(
                                                                   Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0));

                uint32_t cnt       = (uint32_t)hamming(structurals);
                uint32_t next_base = @base + cnt;
                while (structurals != 0)
                {
                    base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    @base += 8;
                }

                @base = next_base;

                quote_mask            ^= prev_iter_inside_quote;
                prev_iter_inside_quote =
                    (uint64_t)((int64_t)quote_mask >>
                               63);  // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code



                var v_lo = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_lo),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(),
                                          vec7f)));

                var v_hi = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_hi),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(),
                                          vec7f)));
                var tmp_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, structural_shufti_mask), zeroBVec);
                var tmp_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, structural_shufti_mask), zeroBVec);

                uint64_t structural_res_0 = (uint32_t)Avx2.MoveMask(tmp_lo);
                uint64_t structural_res_1 = (uint64_t)Avx2.MoveMask(tmp_hi);
                structurals = ~(structural_res_0 | (structural_res_1 << 32));

                var tmp_ws_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec);
                var tmp_ws_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec);

                uint64_t ws_res_0   = (uint32_t)Avx2.MoveMask(tmp_ws_lo);
                uint64_t ws_res_1   = (uint64_t)Avx2.MoveMask(tmp_ws_hi);
                uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));


                // mask off anything inside quotes
                structurals &= ~quote_mask;

                // add the real quote bits back into our bitmask as well, so we can
                // quickly traverse the strings we've spent all this trouble gathering
                structurals |= quote_bits;

                // Now, establish "pseudo-structural characters". These are non-whitespace
                // characters that are (a) outside quotes and (b) have a predecessor that's
                // either whitespace or a structural character. This means that subsequent
                // passes will get a chance to encounter the first character of every string
                // of non-whitespace and, if we're parsing an atom like true/false/null or a
                // number we can stop at the first whitespace or structural character
                // following it.

                // a qualified predecessor is something that can happen 1 position before an
                // psuedo-structural character
                uint64_t pseudo_pred         = structurals | whitespace;
                uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
                prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
                uint64_t pseudo_structurals =
                    shifted_pseudo_pred & (~whitespace) & (~quote_mask);
                structurals |= pseudo_structurals;

                // now, we've used our close quotes all we need to. So let's switch them off
                // they will be off in the quote mask and on in quote bits.
                structurals &= ~(quote_bits & ~quote_mask);

                //Console.WriteLine($"Iter: {idx}, satur: {structurals}");

                //*(uint64_t *)(pj->structurals + idx / 8) = structurals;
            }

            ////////////////
            /// we use a giant copy-paste which is ugly.
            /// but otherwise the string needs to be properly padded or else we
            /// risk invalidating the UTF-8 checks.
            ////////////
            if (idx < len)
            {
                uint8_t *tmpbuf = stackalloc uint8_t[64];
                memset(tmpbuf, 0x20, 64);
                memcpy(tmpbuf, buf + idx, len - idx);
                Vector256 <byte> input_lo = Avx.LoadVector256(tmpbuf + 0);
                Vector256 <byte> input_hi = Avx.LoadVector256(tmpbuf + 32);
                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 1: detect odd sequences of backslashes
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t bs_bits =
                    cmp_mask_against_input(input_lo, input_hi, slashVec);
                uint64_t start_edges = bs_bits & ~(bs_bits << 1);
                // flip lowest if we have an odd-length run at the end of the prior
                // iteration
                uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                uint64_t even_starts     = start_edges & even_start_mask;
                uint64_t odd_starts      = start_edges & ~even_start_mask;
                uint64_t even_carries    = bs_bits + even_starts;

                uint64_t odd_carries;
                // must record the carry-out of our odd-carries out of bit 63; this
                // indicates whether the sense of any edge going to the next iteration
                // should be flipped
                //bool iter_ends_odd_backslash =
                add_overflow(bs_bits, odd_starts, &odd_carries);

                odd_carries |=
                    prev_iter_ends_odd_backslash; // push in bit zero as a potential end
                // if we had an odd-numbered run at the
                // end of the previous iteration
                //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
                uint64_t even_carry_ends    = even_carries & ~bs_bits;
                uint64_t odd_carry_ends     = odd_carries & ~bs_bits;
                uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                uint64_t odd_ends           = even_start_odd_end | odd_start_even_end;

                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 2: detect insides of quote pairs
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t quote_bits =
                    cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec);
                quote_bits = quote_bits & ~odd_ends;
                uint64_t quote_mask = (uint64_t)Sse2.X64.ConvertToInt64(Pclmulqdq.CarrylessMultiply(
                                                                            Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0).AsInt64());
                quote_mask ^= prev_iter_inside_quote;

                //BUG? https://github.com/dotnet/coreclr/issues/22813
                //quote_mask = 60;
                //prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20

                uint32_t cnt       = (uint32_t)hamming(structurals);
                uint32_t next_base = @base + cnt;
                while (structurals != 0)
                {
                    base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    @base += 8;
                }
                @base = next_base;
                // How do we build up a user traversable data structure
                // first, do a 'shufti' to detect structural JSON characters
                // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
                // these go into the first 3 buckets of the comparison (1/2/4)

                // we are also interested in the four whitespace characters
                // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
                // these go into the next 2 buckets of the comparison (8/16)

                var v_lo = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_lo),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(),
                                          vec7f)));

                var v_hi = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_hi),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(),
                                          vec7f)));
                var tmp_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, structural_shufti_mask), zeroBVec);
                var tmp_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, structural_shufti_mask), zeroBVec);

                uint64_t structural_res_0 = (uint32_t)Avx2.MoveMask(tmp_lo);
                uint64_t structural_res_1 = (uint64_t)Avx2.MoveMask(tmp_hi);
                structurals = ~(structural_res_0 | (structural_res_1 << 32));

                // this additional mask and transfer is non-trivially expensive,
                // unfortunately
                var tmp_ws_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec);
                var tmp_ws_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec);

                uint64_t ws_res_0   = (uint32_t)Avx2.MoveMask(tmp_ws_lo);
                uint64_t ws_res_1   = (uint64_t)Avx2.MoveMask(tmp_ws_hi);
                uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));


                // mask off anything inside quotes
                structurals &= ~quote_mask;

                // add the real quote bits back into our bitmask as well, so we can
                // quickly traverse the strings we've spent all this trouble gathering
                structurals |= quote_bits;

                // Now, establish "pseudo-structural characters". These are non-whitespace
                // characters that are (a) outside quotes and (b) have a predecessor that's
                // either whitespace or a structural character. This means that subsequent
                // passes will get a chance to encounter the first character of every string
                // of non-whitespace and, if we're parsing an atom like true/false/null or a
                // number we can stop at the first whitespace or structural character
                // following it.

                // a qualified predecessor is something that can happen 1 position before an
                // psuedo-structural character
                uint64_t pseudo_pred         = structurals | whitespace;
                uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
                prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
                uint64_t pseudo_structurals =
                    shifted_pseudo_pred & (~whitespace) & (~quote_mask);
                structurals |= pseudo_structurals;

                // now, we've used our close quotes all we need to. So let's switch them off
                // they will be off in the quote mask and on in quote bits.
                structurals &= ~(quote_bits & ~quote_mask);
                //*(uint64_t *)(pj->structurals + idx / 8) = structurals;
                idx += 64;
            }
            uint32_t cnt2       = (uint32_t)hamming(structurals);
            uint32_t next_base2 = @base + cnt2;

            while (structurals != 0)
            {
                base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                @base += 8;
            }
            @base = next_base2;

            pj->n_structural_indexes = @base;
            if (base_ptr[pj->n_structural_indexes - 1] > len)
            {
                throw new InvalidOperationException("Internal bug");
            }
            if (len != base_ptr[pj->n_structural_indexes - 1])
            {
                // the string might not be NULL terminated, but we add a virtual NULL ending character.
                base_ptr[pj->n_structural_indexes++] = (uint32_t)len;
            }
            base_ptr[pj->n_structural_indexes] = 0; // make it safe to dereference one beyond this array

            return(true);
        }
예제 #8
0
        public static bool unified_machine(uint8_t *buf, size_t len, ParsedJson *pj)
        {
            uint32_t i = 0;     // index of the structural character (0,1,2,3...)
            uint32_t idx;       // location of the structural character in the input (buf)
            uint8_t  c;         // used to track the (structural) character we are looking at, updated
            // by UPDATE_CHAR macro
            uint32_t depth = 0; // could have an arbitrary starting depth

            pj->init();
            if (pj->bytecapacity < len)
            {
                Debug.Write("insufficient capacity\n");
                return(false);
            }

            // this macro reads the next structural character, updating idx, i and c.
            //C#: expanded directly everywhere
            //void UPDATE_CHAR()
            //{
            //    idx = pj->structural_indexes[i++];
            //    c = buf[idx];
            //}

            pj->ret_address[depth]             = (bytechar)'s';
            pj->containing_scope_offset[depth] = pj->get_current_loc();
            pj->write_tape(0, (byte)'r'); // r for root, 0 is going to get overwritten
            // the root is used, if nothing else, to capture the size of the tape
            depth++;                      // everything starts at depth = 1, depth = 0 is just for the root, the root may contain an object, an array or something else.
            if (depth > pj->depthcapacity)
            {
                goto fail;
            }


            //UPDATE_CHAR():
            idx = pj->structural_indexes[i++];
            c   = buf[idx];

            switch (c)
            {
            case (uint8_t)'{':
                pj->containing_scope_offset[depth] = pj->get_current_loc();
                pj->ret_address[depth]             = (bytechar)'s';
                depth++;
                if (depth > pj->depthcapacity)
                {
                    goto fail;
                }

                pj->write_tape(0, c);     // strangely, moving this to object_begin slows things down
                goto object_begin;

            case (uint8_t)'[':
                pj->containing_scope_offset[depth] = pj->get_current_loc();
                pj->ret_address[depth]             = (bytechar)'s';
                depth++;
                if (depth > pj->depthcapacity)
                {
                    goto fail;
                }

                pj->write_tape(0, c);
                goto array_begin;

                // A JSON text is a serialized value.  Note that certain previous
                // specifications of JSON constrained a JSON text to be an object or an
                // array.  Implementations that generate only objects or arrays where a
                // JSON text is called for will be interoperable in the sense that all
                // implementations will accept these as conforming JSON texts.
                // https://tools.ietf.org/html/rfc8259
#if SIMDJSON_ALLOWANYTHINGINROOT
            case (uint8_t)'"':
            {
                if (!parse_string(buf, len, pj, depth, idx))
                {
                    goto fail;
                }

                break;
            }

            case (uint8_t)'t':
            {
                // we need to make a copy to make sure that the string is NULL terminated.
                // this only applies to the JSON document made solely of the true value.
                // this will almost never be called in practice
                bytechar *copy = stackalloc bytechar[(int)(len + SIMDJSON_PADDING)];
                if (copy == null)
                {
                    goto fail;
                }
                memcpy(copy, buf, len);
                copy[len] = (bytechar)'\0';
                if (!is_valid_true_atom((uint8_t *)copy + idx))
                {
                    //free(copy);
                    goto fail;
                }

                //free(copy);
                pj->write_tape(0, c);
                break;
            }

            case (uint8_t)'f':
            {
                // we need to make a copy to make sure that the string is NULL terminated.
                // this only applies to the JSON document made solely of the false value.
                // this will almost never be called in practice
                bytechar *copy = stackalloc bytechar[(int)(len + SIMDJSON_PADDING)];
                if (copy == null)
                {
                    goto fail;
                }
                memcpy(copy, buf, len);
                copy[len] = (bytechar)'\0';
                if (!is_valid_false_atom((uint8_t *)copy + idx))
                {
                    //free(copy);
                    goto fail;
                }

                //free(copy);
                pj->write_tape(0, c);
                break;
            }

            case (uint8_t)'n':
            {
                // we need to make a copy to make sure that the string is NULL terminated.
                // this only applies to the JSON document made solely of the null value.
                // this will almost never be called in practice
                bytechar *copy = stackalloc bytechar[(int)(len + SIMDJSON_PADDING)];
                if (copy == null)
                {
                    goto fail;
                }
                memcpy(copy, buf, len);
                copy[len] = (bytechar)'\0';
                if (!is_valid_null_atom((uint8_t *)copy + idx))
                {
                    //free(copy);
                    goto fail;
                }

                //free(copy);
                pj->write_tape(0, c);
                break;
            }

            case (uint8_t)'0':
            case (uint8_t)'1':
            case (uint8_t)'2':
            case (uint8_t)'3':
            case (uint8_t)'4':
            case (uint8_t)'5':
            case (uint8_t)'6':
            case (uint8_t)'7':
            case (uint8_t)'8':
            case (uint8_t)'9':
            {
                // we need to make a copy to make sure that the string is NULL terminated.
                // this is done only for JSON documents made of a sole number
                // this will almost never be called in practice
                bytechar *copy = stackalloc bytechar[(int)(len + SIMDJSON_PADDING)];
                if (copy == null)
                {
                    goto fail;
                }
                memcpy(copy, buf, len);
                copy[len] = (bytechar)'\0';
                if (!parse_number((uint8_t *)copy, pj, idx, false))
                {
                    //free(copy);
                    goto fail;
                }

                //free(copy);
                break;
            }

            case (uint8_t)'-':
            {
                // we need to make a copy to make sure that the string is NULL terminated.
                // this is done only for JSON documents made of a sole number
                // this will almost never be called in practice
                bytechar *copy = stackalloc bytechar[(int)(len + SIMDJSON_PADDING)];
                if (copy == null)
                {
                    goto fail;
                }
                memcpy(copy, buf, len);
                copy[len] = (bytechar)'\0';
                if (!parse_number((uint8_t *)copy, pj, idx, true))
                {
                    //free(copy);
                    goto fail;
                }

                //free(copy);
                break;
            }
#endif // ALLOWANYTHINGINROOT
            default:
                goto fail;
            }

start_continue:
            // the string might not be NULL terminated.
            if (i + 1 == pj->n_structural_indexes)
            {
                goto succeed;
            }
            else
            {
                goto fail;
            }
            ////////////////////////////// OBJECT STATES /////////////////////////////

object_begin:
            //UPDATE_CHAR():
            idx = pj->structural_indexes[i++];
            c   = buf[idx];
            switch (c)
            {
            case (uint8_t)'"':
            {
                if (!parse_string(buf, len, pj, depth, idx))
                {
                    goto fail;
                }

                goto object_key_state;
            }

            case (uint8_t)'}':
                goto scope_end;     // could also go to object_continue

            default:
                goto fail;
            }

object_key_state:
            //UPDATE_CHAR():
            idx = pj->structural_indexes[i++];
            c   = buf[idx];
            if (c != ':')
            {
                goto fail;
            }

            //UPDATE_CHAR():
            idx = pj->structural_indexes[i++];
            c   = buf[idx];
            switch (c)
            {
            case (uint8_t)'"':
            {
                if (!parse_string(buf, len, pj, depth, idx))
                {
                    goto fail;
                }

                break;
            }

            case (uint8_t)'t':
                if (!is_valid_true_atom(buf + idx))
                {
                    goto fail;
                }

                pj->write_tape(0, c);
                break;

            case (uint8_t)'f':
                if (!is_valid_false_atom(buf + idx))
                {
                    goto fail;
                }

                pj->write_tape(0, c);
                break;

            case (uint8_t)'n':
                if (!is_valid_null_atom(buf + idx))
                {
                    goto fail;
                }

                pj->write_tape(0, c);
                break;

            case (uint8_t)'0':
            case (uint8_t)'1':
            case (uint8_t)'2':
            case (uint8_t)'3':
            case (uint8_t)'4':
            case (uint8_t)'5':
            case (uint8_t)'6':
            case (uint8_t)'7':
            case (uint8_t)'8':
            case (uint8_t)'9':
            {
                if (!parse_number(buf, pj, idx, false))
                {
                    goto fail;
                }

                break;
            }

            case (uint8_t)'-':
            {
                if (!parse_number(buf, pj, idx, true))
                {
                    goto fail;
                }

                break;
            }

            case (uint8_t)'{':
            {
                pj->containing_scope_offset[depth] = pj->get_current_loc();
                pj->write_tape(0, c);     // here the compilers knows what c is so this gets optimized
                // we have not yet encountered } so we need to come back for it
                pj->ret_address[depth] = (bytechar)'o';
                // we found an object inside an object, so we need to increment the depth
                depth++;
                if (depth > pj->depthcapacity)
                {
                    goto fail;
                }

                goto object_begin;
            }

            case (uint8_t)'[':
            {
                pj->containing_scope_offset[depth] = pj->get_current_loc();
                pj->write_tape(0, c);     // here the compilers knows what c is so this gets optimized
                // we have not yet encountered } so we need to come back for it
                pj->ret_address[depth] = (bytechar)'o';
                // we found an array inside an object, so we need to increment the depth
                depth++;
                if (depth > pj->depthcapacity)
                {
                    goto fail;
                }

                goto array_begin;
            }

            default:
                goto fail;
            }

object_continue:
            //UPDATE_CHAR():
            idx = pj->structural_indexes[i++];
            c   = buf[idx];
            switch (c)
            {
            case (uint8_t)',':
                //UPDATE_CHAR():
                idx = pj->structural_indexes[i++];
                c   = buf[idx];
                if (c != (uint8_t)'"')
                {
                    goto fail;
                }
                else
                {
                    if (!parse_string(buf, len, pj, depth, idx))
                    {
                        goto fail;
                    }

                    goto object_key_state;
                }

            case (uint8_t)'}':
                goto scope_end;

            default:
                goto fail;
            }

            ////////////////////////////// COMMON STATE /////////////////////////////

scope_end:
            // write our tape location to the header scope
            depth--;
            pj->write_tape(pj->containing_scope_offset[depth], c);
            pj->annotate_previousloc(pj->containing_scope_offset[depth],
                                     pj->get_current_loc());
            // goto saved_state
            if (pj->ret_address[depth] == (uint8_t)'a')
            {
                goto array_continue;
            }
            else if (pj->ret_address[depth] == (uint8_t)'o')
            {
                goto object_continue;
            }
            else
            {
                goto start_continue;
            }

            ////////////////////////////// ARRAY STATES /////////////////////////////
array_begin:
            //UPDATE_CHAR():
            idx = pj->structural_indexes[i++];
            c   = buf[idx];
            if (c == ']')
            {
                goto scope_end; // could also go to array_continue
            }

main_array_switch:
            // we call update char on all paths in, so we can peek at c on the
            // on paths that can accept a close square brace (post-, and at start)
            switch (c)
            {
            case (uint8_t)'"':
            {
                if (!parse_string(buf, len, pj, depth, idx))
                {
                    goto fail;
                }

                break;
            }

            case (uint8_t)'t':
                if (!is_valid_true_atom(buf + idx))
                {
                    goto fail;
                }

                pj->write_tape(0, c);
                break;

            case (uint8_t)'f':
                if (!is_valid_false_atom(buf + idx))
                {
                    goto fail;
                }

                pj->write_tape(0, c);
                break;

            case (uint8_t)'n':
                if (!is_valid_null_atom(buf + idx))
                {
                    goto fail;
                }

                pj->write_tape(0, c);
                break;     // goto array_continue;

            case (uint8_t)'0':
            case (uint8_t)'1':
            case (uint8_t)'2':
            case (uint8_t)'3':
            case (uint8_t)'4':
            case (uint8_t)'5':
            case (uint8_t)'6':
            case (uint8_t)'7':
            case (uint8_t)'8':
            case (uint8_t)'9':
            {
                if (!parse_number(buf, pj, idx, false))
                {
                    goto fail;
                }

                break;     // goto array_continue;
            }

            case (uint8_t)'-':
            {
                if (!parse_number(buf, pj, idx, true))
                {
                    goto fail;
                }

                break;     // goto array_continue;
            }

            case (uint8_t)'{':
            {
                // we have not yet encountered ] so we need to come back for it
                pj->containing_scope_offset[depth] = pj->get_current_loc();
                pj->write_tape(0, c);     //  here the compilers knows what c is so this gets optimized
                pj->ret_address[depth] = (bytechar)'a';
                // we found an object inside an array, so we need to increment the depth
                depth++;
                if (depth > pj->depthcapacity)
                {
                    goto fail;
                }

                goto object_begin;
            }

            case (uint8_t)'[':
            {
                // we have not yet encountered ] so we need to come back for it
                pj->containing_scope_offset[depth] = pj->get_current_loc();
                pj->write_tape(0, c);     // here the compilers knows what c is so this gets optimized
                pj->ret_address[depth] = (bytechar)'a';
                // we found an array inside an array, so we need to increment the depth
                depth++;
                if (depth > pj->depthcapacity)
                {
                    goto fail;
                }

                goto array_begin;
            }

            default:
                goto fail;
            }

array_continue:
            //UPDATE_CHAR():
            idx = pj->structural_indexes[i++];
            c   = buf[idx];
            switch (c)
            {
            case (uint8_t)',':
                //UPDATE_CHAR():
                idx = pj->structural_indexes[i++];
                c   = buf[idx];
                goto main_array_switch;

            case (uint8_t)']':
                goto scope_end;

            default:
                goto fail;
            }


            ////////////////////////////// FINAL STATES /////////////////////////////

succeed:
            depth--;
            if (depth != 0)
            {
                throw new InvalidOperationException("internal bug");
            }

            if (pj->containing_scope_offset[depth] != 0)
            {
                throw new InvalidOperationException("internal bug");
            }

            pj->annotate_previousloc(pj->containing_scope_offset[depth],
                                     pj->get_current_loc());
            pj->write_tape(pj->containing_scope_offset[depth], (byte)'r');  // r is root
            pj->isvalid = true;
            return(true);



fail:
            return(false);
        }