Esempio n. 1
0
        public static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            bytechar *p        = (bytechar *)(buf + offset);
            bool      negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
                if (!is_integer(*p))
                {
                    // a negative sign must be followed by an integer

                    return(false);
                }
            }

            bytechar *startdigits = p;

            int64_t i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p)))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                i = 0;
            }
            else
            {
                if (!(is_integer(*p)))
                {
                    // must start with an integer
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (unsigned_bytechar)(*p - '0');
                    i     = 10 * i + digit; // might overflow
                    ++p;
                }
            }

            int64_t exponent = 0;

            if ('.' == *p)
            {
                ++p;
                bytechar *firstafterperiod = p;
                if (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    i = i * 10 + digit;
                }
                else
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }
#if SWAR_NUMBER_PARSING
                // this helps if we have lots of decimals!
                // this turns out to be frequent enough.
                if (is_made_of_eight_digits_fast(p))
                {
                    i  = i * 100000000 + parse_eight_digits_unrolled(p);
                    p += 8;
                    // exponent -= 8;
                }
#endif
                while (is_integer(*p))
                {
                    unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                    ++p;
                    i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later.
                }

                exponent = firstafterperiod - p;
            }

            int digitcount = (int)(p - startdigits - 1);

            int64_t expnumber = 0; // exponential part
            if (('e' == *p) || ('E' == *p))
            {
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                expnumber = digit;
                p++;
                while (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (unsigned_bytechar)(*p - '0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    // we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false);
                }

                exponent += (negexp ? -expnumber : expnumber);
            }

            i = negative ? -i : i;
            if ((exponent != 0) || (expnumber != 0))
            {
                if ((digitcount >= 19))
                {
                    // this is uncommon!!!
                    // this is almost never going to get called!!!
                    // we start anew, going slowly!!!
                    return(parse_float(buf, pj, offset,
                                       found_minus));
                }

                ///////////
                // We want 0.1e1 to be a float.
                //////////
                if (i == 0)
                {
                    pj.WriteTapeDouble(0.0);
#if JSON_TEST_NUMBERS // for unit testing
                    foundFloat(0.0, buf + offset);
#endif
                }
                else
                {
                    if ((exponent > 308) || (exponent < -308))
                    {
                        // we refuse to parse this
#if JSON_TEST_NUMBERS // for unit testing
                        foundInvalidNumber(buf + offset);
#endif
                        return(false);
                    }

                    double d = i;
                    d *= power_of_ten[308 + exponent];
                    // d = negative ? -d : d;
                    pj.WriteTapeDouble(d);
#if JSON_TEST_NUMBERS // for unit testing
                    foundFloat(d, buf + offset);
#endif
                }
            }
            else
            {
                if ((digitcount >= 18))
                {
                    // this is uncommon!!!
                    return(parse_large_integer(buf, pj, offset,
                                               found_minus));
                }

                pj.WriteTapeInt64(i);
#if JSON_TEST_NUMBERS // for unit testing
                foundInteger(i, buf + offset);
#endif
            }

            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }
Esempio n. 2
0
        internal static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            char1 *p        = (char1 *)(buf + offset);
            bool   negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
                if (!is_integer(*p))
                {
                    // a negative sign must be followed by an integer
                    return(false);
                }
            }

            char1 *  startdigits = p;
            uint64_t i; // an unsigned int avoids signed overflows (which are bad)

            if (*p == (char1)'0')
            {
                // 0 cannot be followed by an integer
                ++p;
                if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p)))
                {
                    return(false);
                }
                i = 0;
            }
            else
            {
                if (!(is_integer(*p)))
                {
                    // must start with an integer
                    return(false);
                }

                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (uchar1)(*p - (uchar1)'0');
                    // a multiplication by 10 is cheaper than an arbitrary integer multiplication
                    i = 10 * i + digit; // might overflow, we will handle the overflow later
                    ++p;
                }
            }

            int64_t exponent = 0;
            bool    is_float = false;

            if ('.' == *p)
            {
                is_float = true; // At this point we know that we have a float
                // we continue with the fiction that we have an integer. If the
                // floating point number is representable as x * 10^z for some integer
                // z that fits in 53 bits, then we will be able to convert back the
                // the integer into a float in a lossless manner.
                ++p;
                char1 *firstafterperiod = p;
                if (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    i = i * 10 + digit; // might overflow + multiplication by 10 is likely cheaper than arbitrary mult.
                    // we will handle the overflow later
                }
                else
                {
                    return(false);
                }
#if SWAR_NUMBER_PARSING
                // this helps if we have lots of decimals!
                // this turns out to be frequent enough.
                if (is_made_of_eight_digits_fast(p))
                {
                    i  = i * 100000000 + parse_eight_digits_unrolled(p);
                    p += 8;
                }
#endif
                while (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later.
                }

                exponent = firstafterperiod - p;
            }

            int     digitcount = (int)(p - startdigits - 1); // used later to guard against overflows
            int64_t expnumber  = 0;                          // exponential part
            if (((char1)'e' == *p) || ((char1)'E' == *p))
            {
                is_float = true;
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
                    return(false);
                }

                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                expnumber = digit;
                p++;
                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    // we refuse to parse this
                    return(false);
                }

                exponent += (negexp ? -expnumber : expnumber);
            }

            if (is_float)
            {
                uint64_t powerindex = (uint64_t)(308 + exponent);
                if (/*unlikely*/ ((digitcount >= 19)))
                {
                    // this is uncommon
                    // It is possible that the integer had an overflow.
                    // We have to handle the case where we have 0.0000somenumber.
                    char1 *start = startdigits;
                    while ((*start == (char1)'0') || (*start == (char1)'.'))
                    {
                        start++;
                    }

                    digitcount -= (int)(start - startdigits);
                    if (digitcount >= 19)
                    {
                        // Ok, chances are good that we had an overflow!
                        // this is almost never going to get called!!!
                        // we start anew, going slowly!!!
                        return(parse_float(buf, pj, offset,
                                           found_minus));
                    }
                }

                if (/*unlikely*/ ((powerindex > 2 * 308)))
                {
                    // this is uncommon!!!
                    // this is almost never going to get called!!!
                    // we start anew, going slowly!!!
                    return(parse_float(buf, pj, offset,
                                       found_minus));
                }

                double factor = power_of_ten[powerindex];
                factor = negative ? -factor : factor;
                double d = i * factor;
                pj.WriteTapeDouble(d);
            }
            else
            {
                if (/*unlikely*/ (digitcount >= 18))
                {
                    // this is uncommon!!!
                    // there is a good chance that we had an overflow, so we need
                    // need to recover: we parse the whole thing again.
                    return(parse_large_integer(buf, pj, offset,
                                               found_minus));
                }

                i = negative ? 0 - i : i;
                pj.WriteTapeInt64((int64_t)i);
            }

            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }
Esempio n. 3
0
        // called by parse_number when we know that the output is an integer,
        // but where there might be some integer overflow.
        // we want to catch overflows!
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        static bool parse_large_integer(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            bytechar *p        = (bytechar *)(buf + offset);
            bool      negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            uint64_t i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                unsigned_bytechar digit = (unsigned_bytechar)(*p - '0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (unsigned_bytechar)(*p - '0');
                    if (mul_overflow(i, 10, &i))
                    {
#if JSON_TEST_NUMBERS // for unit testing
                        foundInvalidNumber(buf + offset);
#endif
                        return(false); // overflow
                    }

                    if (add_overflow(i, digit, &i))
                    {
#if JSON_TEST_NUMBERS // for unit testing
                        foundInvalidNumber(buf + offset);
#endif
                        return(false); // overflow
                    }

                    ++p;
                }
            }

            if (negative)
            {
                if (i > 0x8000000000000000)
                {
                    // overflows!
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false); // overflow
                }
            }
            else
            {
                if (i >= 0x8000000000000000)
                {
                    // overflows!
#if JSON_TEST_NUMBERS // for unit testing
                    foundInvalidNumber(buf + offset);
#endif
                    return(false); // overflow
                }
            }

            int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i;
            pj.WriteTapeInt64(signed_answer);
#if JSON_TEST_NUMBERS // for unit testing
            foundInteger(signed_answer, buf + offset);
#endif
            return(is_structural_or_whitespace((byte)(*p)) != 0);
        }
Esempio n. 4
0
        // called by parse_number when we know that the output is an integer,
        // but where there might be some integer overflow.
        // we want to catch overflows!
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        static bool parse_large_integer(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            char1 *p = (char1 *)(buf + offset);

            bool negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            uint64_t i;

            if (*p == (uchar1)'0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (uchar1)(*p - (uchar1)'0');
                    if (mul_overflow(i, 10, &i))
                    {
                        return(false); // overflow
                    }

                    if (add_overflow(i, digit, &i))
                    {
                        return(false); // overflow
                    }
                    ++p;
                }
            }

            if (negative)
            {
                if (i > 0x8000000000000000)
                {
                    return(false); // overflow
                }
            }
            else
            {
                if (i >= 0x8000000000000000)
                {
                    return(false); // overflow
                }
            }

            int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i;

            pj.WriteTapeInt64(signed_answer);
            return(is_structural_or_whitespace((uchar1)(*p)) != 0);
        }