public static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus) { bytechar *p = (bytechar *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; if (!is_integer(*p)) { // a negative sign must be followed by an integer return(false); } } bytechar *startdigits = p; int64_t i; if (*p == '0') { // 0 cannot be followed by an integer ++p; if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p))) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } i = 0; } else { if (!(is_integer(*p))) { // must start with an integer #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); i = 10 * i + digit; // might overflow ++p; } } int64_t exponent = 0; if ('.' == *p) { ++p; bytechar *firstafterperiod = p; if (is_integer(*p)) { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); ++p; i = i * 10 + digit; } else { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } #if SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; // exponent -= 8; } #endif while (is_integer(*p)) { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later. } exponent = firstafterperiod - p; } int digitcount = (int)(p - startdigits - 1); int64_t expnumber = 0; // exponential part if (('e' == *p) || ('E' == *p)) { ++p; bool negexp = false; if ('-' == *p) { negexp = true; ++p; } else if ('+' == *p) { ++p; } if (!is_integer(*p)) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); expnumber = digit; p++; while (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { // we refuse to parse this #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } exponent += (negexp ? -expnumber : expnumber); } i = negative ? -i : i; if ((exponent != 0) || (expnumber != 0)) { if ((digitcount >= 19)) { // this is uncommon!!! // this is almost never going to get called!!! // we start anew, going slowly!!! return(parse_float(buf, pj, offset, found_minus)); } /////////// // We want 0.1e1 to be a float. ////////// if (i == 0) { pj.WriteTapeDouble(0.0); #if JSON_TEST_NUMBERS // for unit testing foundFloat(0.0, buf + offset); #endif } else { if ((exponent > 308) || (exponent < -308)) { // we refuse to parse this #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } double d = i; d *= power_of_ten[308 + exponent]; // d = negative ? -d : d; pj.WriteTapeDouble(d); #if JSON_TEST_NUMBERS // for unit testing foundFloat(d, buf + offset); #endif } } else { if ((digitcount >= 18)) { // this is uncommon!!! return(parse_large_integer(buf, pj, offset, found_minus)); } pj.WriteTapeInt64(i); #if JSON_TEST_NUMBERS // for unit testing foundInteger(i, buf + offset); #endif } return(is_structural_or_whitespace((uint8_t)(*p)) != 0); }
internal static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus) { char1 *p = (char1 *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; if (!is_integer(*p)) { // a negative sign must be followed by an integer return(false); } } char1 * startdigits = p; uint64_t i; // an unsigned int avoids signed overflows (which are bad) if (*p == (char1)'0') { // 0 cannot be followed by an integer ++p; if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p))) { return(false); } i = 0; } else { if (!(is_integer(*p))) { // must start with an integer return(false); } uchar1 digit = (uchar1)(*p - (uchar1)'0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); // a multiplication by 10 is cheaper than an arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later ++p; } } int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; // At this point we know that we have a float // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. ++p; char1 *firstafterperiod = p; if (is_integer(*p)) { uchar1 digit = (uchar1)(*p - (uchar1)'0'); ++p; i = i * 10 + digit; // might overflow + multiplication by 10 is likely cheaper than arbitrary mult. // we will handle the overflow later } else { return(false); } #if SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif while (is_integer(*p)) { uchar1 digit = (uchar1)(*p - (uchar1)'0'); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later. } exponent = firstafterperiod - p; } int digitcount = (int)(p - startdigits - 1); // used later to guard against overflows int64_t expnumber = 0; // exponential part if (((char1)'e' == *p) || ((char1)'E' == *p)) { is_float = true; ++p; bool negexp = false; if ('-' == *p) { negexp = true; ++p; } else if ('+' == *p) { ++p; } if (!is_integer(*p)) { return(false); } uchar1 digit = (uchar1)(*p - (uchar1)'0'); expnumber = digit; p++; if (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { // we refuse to parse this return(false); } exponent += (negexp ? -expnumber : expnumber); } if (is_float) { uint64_t powerindex = (uint64_t)(308 + exponent); if (/*unlikely*/ ((digitcount >= 19))) { // this is uncommon // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. char1 *start = startdigits; while ((*start == (char1)'0') || (*start == (char1)'.')) { start++; } digitcount -= (int)(start - startdigits); if (digitcount >= 19) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! return(parse_float(buf, pj, offset, found_minus)); } } if (/*unlikely*/ ((powerindex > 2 * 308))) { // this is uncommon!!! // this is almost never going to get called!!! // we start anew, going slowly!!! return(parse_float(buf, pj, offset, found_minus)); } double factor = power_of_ten[powerindex]; factor = negative ? -factor : factor; double d = i * factor; pj.WriteTapeDouble(d); } else { if (/*unlikely*/ (digitcount >= 18)) { // this is uncommon!!! // there is a good chance that we had an overflow, so we need // need to recover: we parse the whole thing again. return(parse_large_integer(buf, pj, offset, found_minus)); } i = negative ? 0 - i : i; pj.WriteTapeInt64((int64_t)i); } return(is_structural_or_whitespace((uint8_t)(*p)) != 0); }
// called by parse_number when we know that the output is an integer, // but where there might be some integer overflow. // we want to catch overflows! // Do not call this function directly as it skips some of the checks from // parse_number // // This function will almost never be called!!! // static bool parse_large_integer(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus) { bytechar *p = (bytechar *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; } uint64_t i; if (*p == '0') { // 0 cannot be followed by an integer ++p; i = 0; } else { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); if (mul_overflow(i, 10, &i)) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } if (add_overflow(i, digit, &i)) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } ++p; } } if (negative) { if (i > 0x8000000000000000) { // overflows! #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } } else { if (i >= 0x8000000000000000) { // overflows! #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } } int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i; pj.WriteTapeInt64(signed_answer); #if JSON_TEST_NUMBERS // for unit testing foundInteger(signed_answer, buf + offset); #endif return(is_structural_or_whitespace((byte)(*p)) != 0); }
// called by parse_number when we know that the output is an integer, // but where there might be some integer overflow. // we want to catch overflows! // Do not call this function directly as it skips some of the checks from // parse_number // // This function will almost never be called!!! // static bool parse_large_integer(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus) { char1 *p = (char1 *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; } uint64_t i; if (*p == (uchar1)'0') { // 0 cannot be followed by an integer ++p; i = 0; } else { uchar1 digit = (uchar1)(*p - (uchar1)'0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); if (mul_overflow(i, 10, &i)) { return(false); // overflow } if (add_overflow(i, digit, &i)) { return(false); // overflow } ++p; } } if (negative) { if (i > 0x8000000000000000) { return(false); // overflow } } else { if (i >= 0x8000000000000000) { return(false); // overflow } } int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i; pj.WriteTapeInt64(signed_answer); return(is_structural_or_whitespace((uchar1)(*p)) != 0); }