Exemple #1
0
        // if needed, allocate memory so that the object is able to process JSON
        // documents having up to len bytes and maxdepth "depth"
        public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH)
        {
            if ((maxdepth == 0) || (len == 0))
            {
                return(false);
            }
            if (len > SIMDJSON_MAXSIZE_BYTES)
            {
                return(false);
            }
            if ((len <= bytecapacity) && (depthcapacity < maxdepth))
            {
                return(true);
            }
            Deallocate();
            isvalid              = false;
            bytecapacity         = 0; // will only set it to len after allocations are a success
            n_structural_indexes = 0;
            uint32_t max_structures = (uint32_t)(ROUNDUP_N(len, 64) + 2 + 7);

            structural_indexes = allocate <uint32_t>(max_structures);
            // a pathological input like "[[[[..." would generate len tape elements, so need a capacity of len + 1
            size_t localtapecapacity = ROUNDUP_N(len + 1, 64);
            // a document with only zero-length strings... could have len/3 string
            // and we would need len/3 * 5 bytes on the string buffer
            size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64);

            string_buf = allocate <uint8_t>(localstringcapacity);
            tape       = allocate <uint64_t>(localtapecapacity);
            containing_scope_offset = allocate <uint32_t>(maxdepth);
            ret_address             = allocate <char1>(maxdepth);
            if ((string_buf == null) || (tape == null) ||
                (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null))
            {
                delete(ret_address);
                delete(containing_scope_offset);
                delete(tape);
                delete(string_buf);
                delete(structural_indexes);
                return(false);
            }

            /*
             * // We do not need to initialize this content for parsing, though we could
             * // need to initialize it for safety.
             * memset(string_buf, 0 , localstringcapacity);
             * memset(structural_indexes, 0, max_structures * sizeof(uint32_t));
             * memset(tape, 0, localtapecapacity * sizeof(uint64_t));
             */
            bytecapacity   = len;
            depthcapacity  = maxdepth;
            tapecapacity   = localtapecapacity;
            stringcapacity = localstringcapacity;
            return(true);
        }
        private static uint32_t parse_eight_digits_unrolled(char1 *chars)
        {
            // this actually computes *16* values so we are being wasteful.
            Vector128 <sbyte>  ascii0 = Vector128.Create((char1)'0');
            Vector128 <sbyte>  input  = Sse2.Subtract(Sse2.LoadVector128(chars), ascii0);
            Vector128 <short>  t1     = Ssse3.MultiplyAddAdjacent(input.AsByte(), mul_1_10);
            Vector128 <int>    t2     = Sse2.MultiplyAddAdjacent(t1, mul_1_100);
            Vector128 <ushort> t3     = Sse41.PackUnsignedSaturate(t2, t2);
            Vector128 <int>    t4     = Sse2.MultiplyAddAdjacent(t3.AsInt16(), mul_1_10000);

            return(Sse2.ConvertToUInt32(t4.AsUInt32())); // only captures the sum of the first 8 digits, drop the rest
        }
        private static bool is_made_of_eight_digits_fast(char1 *chars)
        {
            uint64_t val;

            memcpy(&val, chars, 8);
            // a branchy method might be faster:
            // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
            //  && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
            //  0x3030303030303030);
            return(((val & 0xF0F0F0F0F0F0F0F0) |
                    (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
                   0x3333333333333333);
        }
 public bool MoveToKey(char1 *key, uint32_t length)
 {
     if (Down())
     {
         do
         {
             Debug.Assert(IsString);
             bool rightkey = ((GetUtf8StringLength() == length) && (!memcmp(GetUtf8String(), key, length)));
             MoveToValue();
             if (rightkey)
             {
                 return(true);
             }
         } while (Next());
         Debug.Assert(Up());// not found
     }
     return(false);
 }
 public bool MoveToKey(char1 *key)
 {
     if (Down())
     {
         do
         {
             Debug.Assert(IsString);
             bool rightkey = (strcmp(GetUtf8String(), key) == 0);// null chars would fool this
             MoveToValue();
             if (rightkey)
             {
                 return(true);
             }
         } while(Next());
         Debug.Assert(Up());// not found
     }
     return(false);
 }
Exemple #6
0
 internal static JsonParseError find_structural_bits(char1 *buf, size_t len, ParsedJson pj)
 => find_structural_bits((uint8_t *)(buf), len, pj);
		/// <summary>
		/// Creates a new CudaRegisteredHostMemory_char1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)!
		/// </summary>
		/// <param name="hostPointer">must be page size aligned (4KBytes)</param>
		/// <param name="size">In elements</param>
		public CudaRegisteredHostMemory_char1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char1));
			_ptr = (char1*)_intPtr;
		}
        internal static bool parse_number(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            char1 *p        = (char1 *)(buf + offset);
            bool   negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
                if (!is_integer(*p))
                {
                    // a negative sign must be followed by an integer
                    return(false);
                }
            }

            char1 *  startdigits = p;
            uint64_t i; // an unsigned int avoids signed overflows (which are bad)

            if (*p == (char1)'0')
            {
                // 0 cannot be followed by an integer
                ++p;
                if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p)))
                {
                    return(false);
                }
                i = 0;
            }
            else
            {
                if (!(is_integer(*p)))
                {
                    // must start with an integer
                    return(false);
                }

                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (uchar1)(*p - (uchar1)'0');
                    // a multiplication by 10 is cheaper than an arbitrary integer multiplication
                    i = 10 * i + digit; // might overflow, we will handle the overflow later
                    ++p;
                }
            }

            int64_t exponent = 0;
            bool    is_float = false;

            if ('.' == *p)
            {
                is_float = true; // At this point we know that we have a float
                // we continue with the fiction that we have an integer. If the
                // floating point number is representable as x * 10^z for some integer
                // z that fits in 53 bits, then we will be able to convert back the
                // the integer into a float in a lossless manner.
                ++p;
                char1 *firstafterperiod = p;
                if (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    i = i * 10 + digit; // might overflow + multiplication by 10 is likely cheaper than arbitrary mult.
                    // we will handle the overflow later
                }
                else
                {
                    return(false);
                }
#if SWAR_NUMBER_PARSING
                // this helps if we have lots of decimals!
                // this turns out to be frequent enough.
                if (is_made_of_eight_digits_fast(p))
                {
                    i  = i * 100000000 + parse_eight_digits_unrolled(p);
                    p += 8;
                }
#endif
                while (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later.
                }

                exponent = firstafterperiod - p;
            }

            int     digitcount = (int)(p - startdigits - 1); // used later to guard against overflows
            int64_t expnumber  = 0;                          // exponential part
            if (((char1)'e' == *p) || ((char1)'E' == *p))
            {
                is_float = true;
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
                    return(false);
                }

                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                expnumber = digit;
                p++;
                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    // we refuse to parse this
                    return(false);
                }

                exponent += (negexp ? -expnumber : expnumber);
            }

            if (is_float)
            {
                uint64_t powerindex = (uint64_t)(308 + exponent);
                if (/*unlikely*/ ((digitcount >= 19)))
                {
                    // this is uncommon
                    // It is possible that the integer had an overflow.
                    // We have to handle the case where we have 0.0000somenumber.
                    char1 *start = startdigits;
                    while ((*start == (char1)'0') || (*start == (char1)'.'))
                    {
                        start++;
                    }

                    digitcount -= (int)(start - startdigits);
                    if (digitcount >= 19)
                    {
                        // Ok, chances are good that we had an overflow!
                        // this is almost never going to get called!!!
                        // we start anew, going slowly!!!
                        return(parse_float(buf, pj, offset,
                                           found_minus));
                    }
                }

                if (/*unlikely*/ ((powerindex > 2 * 308)))
                {
                    // this is uncommon!!!
                    // this is almost never going to get called!!!
                    // we start anew, going slowly!!!
                    return(parse_float(buf, pj, offset,
                                       found_minus));
                }

                double factor = power_of_ten[powerindex];
                factor = negative ? -factor : factor;
                double d = i * factor;
                pj.WriteTapeDouble(d);
            }
            else
            {
                if (/*unlikely*/ (digitcount >= 18))
                {
                    // this is uncommon!!!
                    // there is a good chance that we had an overflow, so we need
                    // need to recover: we parse the whole thing again.
                    return(parse_large_integer(buf, pj, offset,
                                               found_minus));
                }

                i = negative ? 0 - i : i;
                pj.WriteTapeInt64((int64_t)i);
            }

            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }
        // called by parse_number when we know that the output is an integer,
        // but where there might be some integer overflow.
        // we want to catch overflows!
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        static bool parse_large_integer(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            char1 *p = (char1 *)(buf + offset);

            bool negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            uint64_t i;

            if (*p == (uchar1)'0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                i = digit;
                p++;
                // the is_made_of_eight_digits_fast routine is unlikely to help here because
                // we rarely see large integer parts like 123456789
                while (is_integer(*p))
                {
                    digit = (uchar1)(*p - (uchar1)'0');
                    if (mul_overflow(i, 10, &i))
                    {
                        return(false); // overflow
                    }

                    if (add_overflow(i, digit, &i))
                    {
                        return(false); // overflow
                    }
                    ++p;
                }
            }

            if (negative)
            {
                if (i > 0x8000000000000000)
                {
                    return(false); // overflow
                }
            }
            else
            {
                if (i >= 0x8000000000000000)
                {
                    return(false); // overflow
                }
            }

            int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i;

            pj.WriteTapeInt64(signed_answer);
            return(is_structural_or_whitespace((uchar1)(*p)) != 0);
        }
        // called by parse_number when we know that the output is a float,
        // but where there might be some integer overflow. The trick here is to
        // parse using floats from the start.
        // Do not call this function directly as it skips some of the checks from
        // parse_number
        //
        // This function will almost never be called!!!
        //
        // Note: a redesign could avoid this function entirely.
        //
        static bool parse_float(uint8_t *buf, ParsedJson pj, uint32_t offset, bool found_minus)
        {
            char1 *p        = (char1 *)(buf + offset);
            bool   negative = false;

            if (found_minus)
            {
                ++p;
                negative = true;
            }

            /*long*/
            double i;

            if (*p == '0')
            {
                // 0 cannot be followed by an integer
                ++p;
                i = 0;
            }
            else
            {
                uchar1 digit = (uchar1)(*p - (uchar1)'0');
                i = digit;
                p++;
                while (is_integer(*p))
                {
                    digit = (uchar1)(*p - (uchar1)'0');
                    i     = 10 * i + digit;
                    ++p;
                }
            }

            if ('.' == *p)
            {
                ++p;
                int fractionalweight = 308;
                if (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;

                    fractionalweight--;
                    i = i + digit * (fractionalweight >= 0 ? power_of_ten[fractionalweight] : 0);
                }
                else
                {
                    return(false);
                }

                while (is_integer(*p))
                {
                    uchar1 digit = (uchar1)(*p - (uchar1)'0');
                    ++p;
                    fractionalweight--;
                    i = i + digit * (fractionalweight >= 0 ? power_of_ten[fractionalweight] : 0);
                }
            }

            if (('e' == *p) || ('E' == *p))
            {
                ++p;
                bool negexp = false;
                if ('-' == *p)
                {
                    negexp = true;
                    ++p;
                }
                else if ('+' == *p)
                {
                    ++p;
                }

                if (!is_integer(*p))
                {
                    return(false);
                }

                uchar1  digit     = (uchar1)(*p - (uchar1)'0');
                int64_t expnumber = digit; // exponential part
                p++;
                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    digit     = (uchar1)(*p - (uchar1)'0');
                    expnumber = 10 * expnumber + digit;
                    ++p;
                }

                if (is_integer(*p))
                {
                    return(false);
                }

                if (/*unlikely*/ (expnumber > 308))
                {
                    // C# needs unlikely!
                    // this path is unlikely
                    if (negexp)
                    {
                        // We either have zero or a subnormal.
                        // We expect this to be uncommon so we go through a slow path.
                        i = subnormal_power10(i, (int)-expnumber);
                    }
                    else
                    {
                        // We know for sure that we have a number that is too large,
                        // we refuse to parse this
                        return(false);
                    }
                }
                else
                {
                    int exponent = (int)(negexp ? -expnumber : expnumber);
                    // we have that expnumber is [0,308] so that
                    // exponent is [-308,308] so that
                    // 308 + exponent is in [0, 2 * 308]
                    i *= power_of_ten[308 + exponent];
                }
            }

            if (is_not_structural_or_whitespace((uint8_t)(*p)) != 0)
            {
                return(false);
            }

            double d = negative ? -i : i;

            pj.WriteTapeDouble(d);
            return(is_structural_or_whitespace((uint8_t)(*p)) != 0);
        }