// if needed, allocate memory so that the object is able to process JSON // documents having up to len bytes and maxdepth "depth" public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) { if ((maxdepth == 0) || (len == 0)) { return(false); } if (len > SIMDJSON_MAXSIZE_BYTES) { return(false); } if ((len <= bytecapacity) && (depthcapacity < maxdepth)) { return(true); } Deallocate(); isvalid = false; bytecapacity = 0; // will only set it to len after allocations are a success n_structural_indexes = 0; uint32_t max_structures = (uint32_t)(ROUNDUP_N(len, 64) + 2 + 7); structural_indexes = allocate <uint32_t>(max_structures); // a pathological input like "[[[[..." would generate len tape elements, so need a capacity of len + 1 size_t localtapecapacity = ROUNDUP_N(len + 1, 64); // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64); string_buf = allocate <uint8_t>(localstringcapacity); tape = allocate <uint64_t>(localtapecapacity); containing_scope_offset = allocate <uint32_t>(maxdepth); ret_address = allocate <char1>(maxdepth); if ((string_buf == null) || (tape == null) || (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null)) { delete(ret_address); delete(containing_scope_offset); delete(tape); delete(string_buf); delete(structural_indexes); return(false); } /* * // We do not need to initialize this content for parsing, though we could * // need to initialize it for safety. * memset(string_buf, 0 , localstringcapacity); * memset(structural_indexes, 0, max_structures * sizeof(uint32_t)); * memset(tape, 0, localtapecapacity * sizeof(uint64_t)); */ bytecapacity = len; depthcapacity = maxdepth; tapecapacity = localtapecapacity; stringcapacity = localstringcapacity; return(true); }
private static uint32_t parse_eight_digits_unrolled(char1 *chars) { // this actually computes *16* values so we are being wasteful. Vector128 <sbyte> ascii0 = Vector128.Create((char1)'0'); Vector128 <sbyte> input = Sse2.Subtract(Sse2.LoadVector128(chars), ascii0); Vector128 <short> t1 = Ssse3.MultiplyAddAdjacent(input.AsByte(), mul_1_10); Vector128 <int> t2 = Sse2.MultiplyAddAdjacent(t1, mul_1_100); Vector128 <ushort> t3 = Sse41.PackUnsignedSaturate(t2, t2); Vector128 <int> t4 = Sse2.MultiplyAddAdjacent(t3.AsInt16(), mul_1_10000); return(Sse2.ConvertToUInt32(t4.AsUInt32())); // only captures the sum of the first 8 digits, drop the rest }
private static bool is_made_of_eight_digits_fast(char1 *chars) { uint64_t val; memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == // 0x3030303030303030); return(((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); }
public bool MoveToKey(char1 *key, uint32_t length) { if (Down()) { do { Debug.Assert(IsString); bool rightkey = ((GetUtf8StringLength() == length) && (!memcmp(GetUtf8String(), key, length))); MoveToValue(); if (rightkey) { return(true); } } while (Next()); Debug.Assert(Up());// not found } return(false); }
public bool MoveToKey(char1 *key) { if (Down()) { do { Debug.Assert(IsString); bool rightkey = (strcmp(GetUtf8String(), key) == 0);// null chars would fool this MoveToValue(); if (rightkey) { return(true); } } while(Next()); Debug.Assert(Up());// not found } return(false); }
internal static JsonParseError find_structural_bits(char1 *buf, size_t len, ParsedJson pj) => find_structural_bits((uint8_t *)(buf), len, pj);
/// <summary> /// Creates a new CudaRegisteredHostMemory_char1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! /// </summary> /// <param name="hostPointer">must be page size aligned (4KBytes)</param> /// <param name="size">In elements</param> public CudaRegisteredHostMemory_char1(IntPtr hostPointer, SizeT size) { _intPtr = hostPointer; _size = size; _typeSize = (SizeT)Marshal.SizeOf(typeof(char1)); _ptr = (char1*)_intPtr; }
internal static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus) { char1 *p = (char1 *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; if (!is_integer(*p)) { // a negative sign must be followed by an integer return(false); } } char1 * startdigits = p; uint64_t i; // an unsigned int avoids signed overflows (which are bad) if (*p == (char1)'0') { // 0 cannot be followed by an integer ++p; if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p))) { return(false); } i = 0; } else { if (!(is_integer(*p))) { // must start with an integer return(false); } uchar1 digit = (uchar1)(*p - (uchar1)'0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); // a multiplication by 10 is cheaper than an arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later ++p; } } int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; // At this point we know that we have a float // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. ++p; char1 *firstafterperiod = p; if (is_integer(*p)) { uchar1 digit = (uchar1)(*p - (uchar1)'0'); ++p; i = i * 10 + digit; // might overflow + multiplication by 10 is likely cheaper than arbitrary mult. // we will handle the overflow later } else { return(false); } #if SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif while (is_integer(*p)) { uchar1 digit = (uchar1)(*p - (uchar1)'0'); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later. } exponent = firstafterperiod - p; } int digitcount = (int)(p - startdigits - 1); // used later to guard against overflows int64_t expnumber = 0; // exponential part if (((char1)'e' == *p) || ((char1)'E' == *p)) { is_float = true; ++p; bool negexp = false; if ('-' == *p) { negexp = true; ++p; } else if ('+' == *p) { ++p; } if (!is_integer(*p)) { return(false); } uchar1 digit = (uchar1)(*p - (uchar1)'0'); expnumber = digit; p++; if (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { // we refuse to parse this return(false); } exponent += (negexp ? -expnumber : expnumber); } if (is_float) { uint64_t powerindex = (uint64_t)(308 + exponent); if (/*unlikely*/ ((digitcount >= 19))) { // this is uncommon // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. char1 *start = startdigits; while ((*start == (char1)'0') || (*start == (char1)'.')) { start++; } digitcount -= (int)(start - startdigits); if (digitcount >= 19) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! return(parse_float(buf, pj, offset, found_minus)); } } if (/*unlikely*/ ((powerindex > 2 * 308))) { // this is uncommon!!! // this is almost never going to get called!!! // we start anew, going slowly!!! return(parse_float(buf, pj, offset, found_minus)); } double factor = power_of_ten[powerindex]; factor = negative ? -factor : factor; double d = i * factor; pj.WriteTapeDouble(d); } else { if (/*unlikely*/ (digitcount >= 18)) { // this is uncommon!!! // there is a good chance that we had an overflow, so we need // need to recover: we parse the whole thing again. return(parse_large_integer(buf, pj, offset, found_minus)); } i = negative ? 0 - i : i; pj.WriteTapeInt64((int64_t)i); } return(is_structural_or_whitespace((uint8_t)(*p)) != 0); }
// called by parse_number when we know that the output is an integer, // but where there might be some integer overflow. // we want to catch overflows! // Do not call this function directly as it skips some of the checks from // parse_number // // This function will almost never be called!!! // static bool parse_large_integer(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus) { char1 *p = (char1 *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; } uint64_t i; if (*p == (uchar1)'0') { // 0 cannot be followed by an integer ++p; i = 0; } else { uchar1 digit = (uchar1)(*p - (uchar1)'0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); if (mul_overflow(i, 10, &i)) { return(false); // overflow } if (add_overflow(i, digit, &i)) { return(false); // overflow } ++p; } } if (negative) { if (i > 0x8000000000000000) { return(false); // overflow } } else { if (i >= 0x8000000000000000) { return(false); // overflow } } int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i; pj.WriteTapeInt64(signed_answer); return(is_structural_or_whitespace((uchar1)(*p)) != 0); }
// called by parse_number when we know that the output is a float, // but where there might be some integer overflow. The trick here is to // parse using floats from the start. // Do not call this function directly as it skips some of the checks from // parse_number // // This function will almost never be called!!! // // Note: a redesign could avoid this function entirely. // static bool parse_float(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus) { char1 *p = (char1 *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; } /*long*/ double i; if (*p == '0') { // 0 cannot be followed by an integer ++p; i = 0; } else { uchar1 digit = (uchar1)(*p - (uchar1)'0'); i = digit; p++; while (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); i = 10 * i + digit; ++p; } } if ('.' == *p) { ++p; int fractionalweight = 308; if (is_integer(*p)) { uchar1 digit = (uchar1)(*p - (uchar1)'0'); ++p; fractionalweight--; i = i + digit * (fractionalweight >= 0 ? power_of_ten[fractionalweight] : 0); } else { return(false); } while (is_integer(*p)) { uchar1 digit = (uchar1)(*p - (uchar1)'0'); ++p; fractionalweight--; i = i + digit * (fractionalweight >= 0 ? power_of_ten[fractionalweight] : 0); } } if (('e' == *p) || ('E' == *p)) { ++p; bool negexp = false; if ('-' == *p) { negexp = true; ++p; } else if ('+' == *p) { ++p; } if (!is_integer(*p)) { return(false); } uchar1 digit = (uchar1)(*p - (uchar1)'0'); int64_t expnumber = digit; // exponential part p++; if (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (uchar1)(*p - (uchar1)'0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { return(false); } if (/*unlikely*/ (expnumber > 308)) { // C# needs unlikely! // this path is unlikely if (negexp) { // We either have zero or a subnormal. // We expect this to be uncommon so we go through a slow path. i = subnormal_power10(i, (int)-expnumber); } else { // We know for sure that we have a number that is too large, // we refuse to parse this return(false); } } else { int exponent = (int)(negexp ? -expnumber : expnumber); // we have that expnumber is [0,308] so that // exponent is [-308,308] so that // 308 + exponent is in [0, 2 * 308] i *= power_of_ten[308 + exponent]; } } if (is_not_structural_or_whitespace((uint8_t)(*p)) != 0) { return(false); } double d = negative ? -i : i; pj.WriteTapeDouble(d); return(is_structural_or_whitespace((uint8_t)(*p)) != 0); }