public static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            bytechar *p        = (bytechar *)(buf + offset);
            bool      negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
                if (!is_integer(*p))
                {
                    // a negative sign must be followed by an integer

                    return(false);
                }
            }

            bytechar *startdigits = p;

            int64_t i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p)))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                i = 0;
            }
            else
            {
                if (!(is_integer(*p)))
                {
                    // must start with an integer
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (unsigned_bytechar)(*p - '0');
                    i     = 10 * i + digit; // might overflow
                    ++p;
                }
            }

            int64_t exponent = 0;

            if ('.' == *p)
            {
                ++p;
                bytechar *firstafterperiod = p;
                if (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    i = i * 10 + digit;
                }
                else
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }
#if SWAR_NUMBER_PARSING
                // this helps if we have lots of decimals!
                // this turns out to be frequent enough.
                if (is_made_of_eight_digits_fast(p))
                {
                    i  = i * 100000000 + parse_eight_digits_unrolled(p);
                    p += 8;
                    // exponent -= 8;
                }
#endif
                while (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later.
                }

                exponent = firstafterperiod - p;
            }

            int digitcount = (int)(p - startdigits - 1);

            int64_t expnumber = 0; // exponential part
            if (('e' == *p) || ('E' == *p))
            {
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                expnumber = digit;
                p++;
                while (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    // we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                exponent += (negexp ? -expnumber : expnumber);
            }

            i = negative ? -i : i;
            if ((exponent != 0) || (expnumber != 0))
            {
                if ((digitcount >= 19))
                {
                    // this is uncommon!!!
                    // this is almost never going to get called!!!
                    // we start anew, going slowly!!!
                    return(parse_float(buf, pj, offset,
                                       found_minus));
                }

                ///////////
                // We want 0.1e1 to be a float.
                //////////
                if (i == 0)
                {
                    pj.WriteTapeDouble(0.0);
#if JSON_TEST_NUMBERS // for unit testing
                    foundFloat(0.0, buf + offset);
#endif
                }
                else
                {
                    if ((exponent > 308) || (exponent < -308))
                    {
                        // we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                        foundInvalidNumber(buf + offset);
#endif
                        return(false);
                    }

                    double d = i;
                    d *= power_of_ten[308 + exponent];
                    // d = negative ? -d : d;
                    pj.WriteTapeDouble(d);
#if JSON_TEST_NUMBERS // for unit testing
                    foundFloat(d, buf + offset);
#endif
                }
            }
            else
            {
                if ((digitcount >= 18))
                {
                    // this is uncommon!!!
                    return(parse_large_integer(buf, pj, offset,
                                               found_minus));
                }

                pj.WriteTapeInt64(i);
#if JSON_TEST_NUMBERS // for unit testing
                foundInteger(i, buf + offset);
#endif
            }

            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }
Beispiel #2
0
        internal static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            char1 *p        = (char1 *)(buf + offset);
            bool   negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
                if (!is_integer(*p))
                {
                    // a negative sign must be followed by an integer
                    return(false);
                }
            }

            char1 *  startdigits = p;
            uint64_t i; // an unsigned int avoids signed overflows (which are bad)

            if (*p == (char1)'0')
            {
                // 0 cannot be followed by an integer
                ++p;
                if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p)))
                {
                    return(false);
                }
                i = 0;
            }
            else
            {
                if (!(is_integer(*p)))
                {
                    // must start with an integer
                    return(false);
                }

                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (uchar1)(*p - (uchar1)'0');
                    // a multiplication by 10 is cheaper than an arbitrary integer multiplication
                    i = 10 * i + digit; // might overflow, we will handle the overflow later
                    ++p;
                }
            }

            int64_t exponent = 0;
            bool    is_float = false;

            if ('.' == *p)
            {
                is_float = true; // At this point we know that we have a float
                // we continue with the fiction that we have an integer. If the
                // floating point number is representable as x * 10^z for some integer
                // z that fits in 53 bits, then we will be able to convert back the
                // the integer into a float in a lossless manner.
                ++p;
                char1 *firstafterperiod = p;
                if (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    i = i * 10 + digit; // might overflow + multiplication by 10 is likely cheaper than arbitrary mult.
                    // we will handle the overflow later
                }
                else
                {
                    return(false);
                }
#if SWAR_NUMBER_PARSING
                // this helps if we have lots of decimals!
                // this turns out to be frequent enough.
                if (is_made_of_eight_digits_fast(p))
                {
                    i  = i * 100000000 + parse_eight_digits_unrolled(p);
                    p += 8;
                }
#endif
                while (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later.
                }

                exponent = firstafterperiod - p;
            }

            int     digitcount = (int)(p - startdigits - 1); // used later to guard against overflows
            int64_t expnumber  = 0;                          // exponential part
            if (((char1)'e' == *p) || ((char1)'E' == *p))
            {
                is_float = true;
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
                    return(false);
                }

                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                expnumber = digit;
                p++;
                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    // we refuse to parse this
                    return(false);
                }

                exponent += (negexp ? -expnumber : expnumber);
            }

            if (is_float)
            {
                uint64_t powerindex = (uint64_t)(308 + exponent);
                if (/*unlikely*/ ((digitcount >= 19)))
                {
                    // this is uncommon
                    // It is possible that the integer had an overflow.
                    // We have to handle the case where we have 0.0000somenumber.
                    char1 *start = startdigits;
                    while ((*start == (char1)'0') || (*start == (char1)'.'))
                    {
                        start++;
                    }

                    digitcount -= (int)(start - startdigits);
                    if (digitcount >= 19)
                    {
                        // Ok, chances are good that we had an overflow!
                        // this is almost never going to get called!!!
                        // we start anew, going slowly!!!
                        return(parse_float(buf, pj, offset,
                                           found_minus));
                    }
                }

                if (/*unlikely*/ ((powerindex > 2 * 308)))
                {
                    // this is uncommon!!!
                    // this is almost never going to get called!!!
                    // we start anew, going slowly!!!
                    return(parse_float(buf, pj, offset,
                                       found_minus));
                }

                double factor = power_of_ten[powerindex];
                factor = negative ? -factor : factor;
                double d = i * factor;
                pj.WriteTapeDouble(d);
            }
            else
            {
                if (/*unlikely*/ (digitcount >= 18))
                {
                    // this is uncommon!!!
                    // there is a good chance that we had an overflow, so we need
                    // need to recover: we parse the whole thing again.
                    return(parse_large_integer(buf, pj, offset,
                                               found_minus));
                }

                i = negative ? 0 - i : i;
                pj.WriteTapeInt64((int64_t)i);
            }

            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }
        // called by parse_number when we know that the output is a float,
        // but where there might be some integer overflow. The trick here is to
        // parse using floats from the start.
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        // Note: a redesign could avoid this function entirely.
        //
        private static bool parse_float(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            bytechar *p        = (bytechar *)(buf + offset);
            bool      negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            double i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                unsigned_bytechar digit = (unsigned_bytechar)(*p - (bytechar)'0');
                i = digit;
                p++;
                while (is_integer(*p))
                {
                    digit = (unsigned_bytechar)(*p - '0');
                    i     = 10 * i + digit;
                    ++p;
                }
            }

            if ('.' == *p)
            {
                ++p;
                double fractionalweight = 1;
                if (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    fractionalweight *= 0.1;
                    i = i + digit * fractionalweight;
                }
                else
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                while (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    fractionalweight *= 0.1;
                    i = i + digit * fractionalweight;
                }
            }

            if (('e' == *p) || ('E' == *p))
            {
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit     = (unsigned_bytechar)(*p - '0');
                int64_t           expnumber = digit; // exponential part
                p++;
                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
// we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                int exponent = (int)(negexp ? -expnumber : expnumber);
                if ((exponent > 308) || (exponent < -308))
                {
// we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                i *= power_of_ten[308 + exponent];
            }

            if (is_not_structural_or_whitespace((byte)*p) != 0)
            {
                return(false);
            }

            double d = negative ? -i : i;
            pj.WriteTapeDouble(d);
#if JSON_TEST_NUMBERS // for unit testing
            foundFloat(d, buf + offset);
#endif
            return(is_structural_or_whitespace((byte)(*p)) != 0);
        }
Beispiel #4
0
        // called by parse_number when we know that the output is a float,
        // but where there might be some integer overflow. The trick here is to
        // parse using floats from the start.
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        // Note: a redesign could avoid this function entirely.
        //
        static bool parse_float(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            char1 *p        = (char1 *)(buf + offset);
            bool   negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            /*long*/
            double i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                i = digit;
                p++;
                while (is_integer(*p))
                {
                    digit = (uchar1)(*p - (uchar1)'0');
                    i     = 10 * i + digit;
                    ++p;
                }
            }

            if ('.' == *p)
            {
                ++p;
                int fractionalweight = 308;
                if (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;

                    fractionalweight--;
                    i = i + digit * (fractionalweight >= 0 ? power_of_ten[fractionalweight] : 0);
                }
                else
                {
                    return(false);
                }

                while (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    fractionalweight--;
                    i = i + digit * (fractionalweight >= 0 ? power_of_ten[fractionalweight] : 0);
                }
            }

            if (('e' == *p) || ('E' == *p))
            {
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
                    return(false);
                }

                uchar1  digit     = (uchar1)(*p - (uchar1)'0');
                int64_t expnumber = digit; // exponential part
                p++;
                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    return(false);
                }

                if (/*unlikely*/ (expnumber > 308))
                {
                    // C# needs unlikely!
                    // this path is unlikely
                    if (negexp)
                    {
                        // We either have zero or a subnormal.
                        // We expect this to be uncommon so we go through a slow path.
                        i = subnormal_power10(i, (int)-expnumber);
                    }
                    else
                    {
                        // We know for sure that we have a number that is too large,
                        // we refuse to parse this
                        return(false);
                    }
                }
                else
                {
                    int exponent = (int)(negexp ? -expnumber : expnumber);
                    // we have that expnumber is [0,308] so that
                    // exponent is [-308,308] so that
                    // 308 + exponent is in [0, 2 * 308]
                    i *= power_of_ten[308 + exponent];
                }
            }

            if (is_not_structural_or_whitespace((uint8_t)(*p)) != 0)
            {
                return(false);
            }

            double d = negative ? -i : i;

            pj.WriteTapeDouble(d);
            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }