internal static parse_string_helper find_bs_bits_and_quote_bits(uint8_t *src, uint8_t *dst) { if (Avx2.IsSupported) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding var v = Avx.LoadVector256(src); // store to dest unconditionally - we can overwrite the bits we don't like // later Avx.Store((dst), v); var quote_mask = Avx2.CompareEqual(v, Vector256.Create((uint8_t)'"')); return(new parse_string_helper { bs_bits = (uint32_t)Avx2.MoveMask(Avx2.CompareEqual(v, Vector256.Create((uint8_t)'\\'))), // bs_bits quote_bits = (uint32_t)Avx2.MoveMask(quote_mask) // quote_bits }); } else // SSE42 { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding var v = Sse2.LoadVector128((src)); // store to dest unconditionally - we can overwrite the bits we don't like // later Sse2.Store((dst), v); var quote_mask = Sse2.CompareEqual(v, Vector128.Create((uint8_t)'"')); return(new parse_string_helper { bs_bits = (uint32_t)Sse2.MoveMask(Sse2.CompareEqual(v, Vector128.Create((uint8_t)'\\'))), // bs_bits quote_bits = (uint32_t)Sse2.MoveMask(quote_mask) // quote_bits }); } }
public static size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = (uint8_t)cp; return(1); // ascii } else if (cp <= 0x7FF) { c[0] = (uint8_t)((cp >> 6) + 192); c[1] = (uint8_t)((cp & 63) + 128); return(2); // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if (cp <= 0xFFFF) { c[0] = (uint8_t)((cp >> 12) + 224); c[1] = (uint8_t)(((cp >> 6) & 63) + 128); c[2] = (uint8_t)((cp & 63) + 128); return(3); } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this is not needed c[0] = (uint8_t)((cp >> 18) + 240); c[1] = (uint8_t)(((cp >> 12) & 63) + 128); c[2] = (uint8_t)(((cp >> 6) & 63) + 128); c[3] = (uint8_t)((cp & 63) + 128); return(4); } // will return 0 when the code point was too large. return(0); // bad r }
internal static JsonParseError JsonParse(uint8_t *jsonData, size_t length, ParsedJson pj, bool reallocIfNeeded = true) { if (pj.bytecapacity < length) { return(JsonParseError.Capacity); } bool reallocated = false; if (reallocIfNeeded) { // realloc is needed if the end of the memory crosses a page if ((size_t)(jsonData + length - 1) % (size_t)pagesize < SIMDJSON_PADDING) { uint8_t *tmpbuf = jsonData; jsonData = (uint8_t *)allocate_padded_buffer(length); if (jsonData == null) { return(JsonParseError.Memalloc); } memcpy(jsonData, tmpbuf, length); reallocated = true; } } var result = JsonParseError.Success; if (stage1_find_marks.find_structural_bits(jsonData, length, pj)) { result = stage2_build_tape.unified_machine(jsonData, length, pj); } if (reallocated) aligned_free(jsonData); }
public static uint32_t hex_to_u32_nocheck(uint8_t *src) { uint8_t v1 = src[0]; uint8_t v2 = src[1]; uint8_t v3 = src[2]; uint8_t v4 = src[3]; return((uint32_t)(digittoval[v1] << 12 | digittoval[v2] << 8 | digittoval[v3] << 4 | digittoval[v4])); }
internal static uint32_t hex_to_u32_nocheck(uint8_t *src) // strictly speaking, static inline is a C-ism { uint32_t v1 = digittoval32[630 + src[0]]; uint32_t v2 = digittoval32[420 + src[1]]; uint32_t v3 = digittoval32[210 + src[2]]; uint32_t v4 = digittoval32[0 + src[3]]; return(v1 | v2 | v3 | v4); }
public static bool parse_string(uint8_t* buf, size_t len, ParsedJson pj, uint32_t depth, uint32_t offset) { if (Avx2.IsSupported) return parse_string_avx2(buf, len, pj, depth, offset); //if (Sse41.IsSupported) // return parse_string_sse41(buf, len, pj, depth, offset); ThrowHelper.ThrowPNSE(); return false; }
// if needed, allocate memory so that the object is able to process JSON // documents having up to len bytes and maxdepth "depth" public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) { if ((maxdepth == 0) || (len == 0)) { return(false); } if (len > SIMDJSON_MAXSIZE_BYTES) { return(false); } if ((len <= bytecapacity) && (depthcapacity < maxdepth)) { return(true); } Deallocate(); isvalid = false; bytecapacity = 0; // will only set it to len after allocations are a success n_structural_indexes = 0; uint32_t max_structures = (uint32_t)(ROUNDUP_N(len, 64) + 2 + 7); structural_indexes = allocate <uint32_t>(max_structures); // a pathological input like "[[[[..." would generate len tape elements, so need a capacity of len + 1 size_t localtapecapacity = ROUNDUP_N(len + 1, 64); // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64); string_buf = allocate <uint8_t>(localstringcapacity); tape = allocate <uint64_t>(localtapecapacity); containing_scope_offset = allocate <uint32_t>(maxdepth); ret_address = allocate <char1>(maxdepth); if ((string_buf == null) || (tape == null) || (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null)) { delete(ret_address); delete(containing_scope_offset); delete(tape); delete(string_buf); delete(structural_indexes); return(false); } /* * // We do not need to initialize this content for parsing, though we could * // need to initialize it for safety. * memset(string_buf, 0 , localstringcapacity); * memset(structural_indexes, 0, max_structures * sizeof(uint32_t)); * memset(tape, 0, localtapecapacity * sizeof(uint64_t)); */ bytecapacity = len; depthcapacity = maxdepth; tapecapacity = localtapecapacity; stringcapacity = localstringcapacity; return(true); }
public static uint32_t hex_to_u32_nocheck(uint8_t *src) { // all these will sign-extend the chars looked up, placing 1-bits into the high 28 bits of every // invalid value. After the shifts, this will *still* result in the outcome that the high 16 bits of any // value with any invalid char will be all 1's. We check for this in the caller. uint8_t v1 = (uint8_t)digittoval[src[0]]; uint8_t v2 = (uint8_t)digittoval[src[1]]; uint8_t v3 = (uint8_t)digittoval[src[2]]; uint8_t v4 = (uint8_t)digittoval[src[3]]; return((uint32_t)(v1 << 12 | v2 << 8 | v3 << 4 | v4)); }
internal static bool is_valid_null_atom(uint8_t *loc) { const uint64_t nv = 2314885532098524526; //* (uint64_t*)"null "; const uint64_t mask4 = 0x00000000ffffffff; uint32_t error = 0; uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) memcpy(&locval, loc, sizeof(uint64_t)); error = (uint32_t)((locval & mask4) ^ nv); error |= is_not_structural_or_whitespace(loc[4]); return(error == 0); }
internal static bool is_valid_false_atom(uint8_t *loc) { const uint64_t fv = 2314885828568703334; //* (uint64_t*)"false "; const uint64_t mask5 = 0x000000ffffffffff; uint32_t error = 0; uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) memcpy(&locval, loc, sizeof(uint64_t)); error = (uint32_t)((locval & mask5) ^ fv); error |= is_not_structural_or_whitespace(loc[5]); return(error == 0); }
internal static bool is_valid_true_atom(uint8_t *loc) { uint64_t tv = 2314885531981673076; //* (uint64_t*)"true "; uint64_t mask4 = 0x00000000ffffffff; uint32_t error = 0; uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) memcpy(&locval, loc, sizeof(uint64_t)); error = (uint32_t)((locval & mask4) ^ tv); error |= is_not_structural_or_whitespace(loc[4]); return(error == 0); }
internal static bool is_valid_null_atom(uint8_t *loc) { uint64_t nv = 2314885532098524526; //* (uint64_t*)"null "; uint64_t mask4 = 0x00000000ffffffff; uint32_t error = 0; uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding memcpy(&locval, loc, sizeof(uint64_t)); error = (uint32_t)((locval & mask4) ^ nv); error |= is_not_structural_or_whitespace(loc[4]); return(error == 0); }
public static ParsedJson build_parsed_json(uint8_t *buf, size_t len, bool reallocifneeded = true) { ParsedJson pj = new ParsedJson(); bool ok = pj.allocateCapacity(len); if (ok) { ok = json_parse(buf, len, &pj, reallocifneeded); } else { throw new InvalidOperationException("failure during memory allocation"); } return(pj); }
public static bool json_parse(uint8_t *buf, size_t len, ParsedJson *pj, bool reallocifneeded = true) { if (pj->bytecapacity < len) { Debug.WriteLine("Your ParsedJson cannot support documents that big: " + len); return(false); } bool reallocated = false; if (reallocifneeded) { // realloc is needed if the end of the memory crosses a page long pagesize = System.Environment.SystemPageSize; if (((size_t)(buf + len - 1) % (size_t)pagesize) < SIMDJSON_PADDING) { uint8_t *tmpbuf = buf; buf = (uint8_t *)Utils.allocate_padded_buffer(len); if (buf == null) { return(false); } memcpy((void *)buf, tmpbuf, len); reallocated = true; } } bool isok = stage1_find_marks.find_structural_bits(buf, len, pj); if (isok) { isok = stage2_build_tape.unified_machine(buf, len, pj); } else { if (reallocated) { Utils.free((void *)buf); } return(false); } if (reallocated) { Utils.free((void *)buf); } return(isok); }
internal static simd_input fill_input(uint8_t *ptr) { if (Avx2.IsSupported) { simd_input @in = new simd_input(); @in.lo = Avx.LoadVector256((ptr + 0)); @in.hi = Avx.LoadVector256((ptr + 32)); return(@in); } else { simd_input @in = new simd_input(); @in.v0 = Sse2.LoadVector128((ptr + 0)); @in.v1 = Sse2.LoadVector128((ptr + 16)); @in.v2 = Sse2.LoadVector128((ptr + 32)); @in.v3 = Sse2.LoadVector128((ptr + 48)); return(@in); } }
// if needed, allocate memory so that the object is able to process JSON // documents having up to len butes and maxdepth "depth" public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) { if ((maxdepth == 0) || (len == 0)) { Debug.WriteLine("capacities must be non-zero "); return(false); } if (len > 0) { if ((len <= bytecapacity) && (depthcapacity < maxdepth)) { return(true); } Deallocate(); } isvalid = false; bytecapacity = 0; // will only set it to len after allocations are a success n_structural_indexes = 0; uint32_t max_structures = (uint32_t)ROUNDUP_N(len, 64) + 2 + 7; structural_indexes = Utils.allocate <uint32_t>(max_structures); size_t localtapecapacity = ROUNDUP_N(len, 64); size_t localstringcapacity = ROUNDUP_N(len, 64); string_buf = Utils.allocate <uint8_t>(localstringcapacity); tape = Utils.allocate <uint64_t>(localtapecapacity); containing_scope_offset = Utils.allocate <uint32_t>(maxdepth); ret_address = Utils.allocate <bytechar>(maxdepth); if ((string_buf == null) || (tape == null) || (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null)) { Deallocate(); return(false); } bytecapacity = len; depthcapacity = maxdepth; tapecapacity = localtapecapacity; stringcapacity = localstringcapacity; return(true); }
internal static bool JsonParse(uint8_t *jsonData, size_t length, ParsedJson pj, bool reallocIfNeeded = true) { if (pj.bytecapacity < length) { throw new InvalidOperationException("Your ParsedJson cannot support documents that big: " + length); } bool reallocated = false; if (reallocIfNeeded) { // realloc is needed if the end of the memory crosses a page if ((size_t)(jsonData + length - 1) % (size_t)pagesize < SIMDJSON_PADDING) { uint8_t *tmpbuf = jsonData; jsonData = (uint8_t *)allocate_padded_buffer(length); if (jsonData == null) { return(false); } memcpy(jsonData, tmpbuf, length); reallocated = true; } } bool isok = stage1_find_marks.find_structural_bits(jsonData, length, pj); if (isok) { isok = stage2_build_tape.unified_machine(jsonData, length, pj); } else { if (reallocated) { free(jsonData); } return(false); } if (reallocated) free(jsonData); }
internal static bool is_valid_false_atom(uint8_t *loc) { // We have to use an integer constant because the space in the cast // below would lead to values illegally being qualified // uint64_t fv = *reinterpret_cast<const uint64_t *>("false "); // using this constant (that is the same false) but nulls out the // unused bits solves that uint64_t fv = 0x00000065736c6166; // takes into account endianness uint64_t mask5 = 0x000000ffffffffff; // we can't use the 32 bit value for checking for errors otherwise // the last character of false (it being 5 byte long!) would be // ignored uint64_t error = 0; uint64_t locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++) // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding memcpy(&locval, loc, sizeof(uint64_t)); error = (locval & mask5) ^ fv; error |= is_not_structural_or_whitespace(loc[5]); return(error == 0); }
private void Deallocate() { isvalid = false; bytecapacity = 0; depthcapacity = 0; tapecapacity = 0; stringcapacity = 0; if (ret_address != null) { delete(ret_address); ret_address = null; } if (containing_scope_offset != null) { delete(containing_scope_offset); containing_scope_offset = null; } if (tape != null) { delete(tape); tape = null; } if (string_buf != null) { delete(string_buf); string_buf = null; } if (structural_indexes != null) { delete(structural_indexes); structural_indexes = null; } }
/************************************* * * Decryption helpers * *************************************/ #if false void galaxian_state::decode_mooncrst(int length, uint8_t *dest) { uint8_t *rom = memregion("maincpu")->base(); int offs; for (offs = 0; offs < length; offs++) { uint8_t data = rom[offs]; uint8_t res = data; if (BIT(data, 1)) { res ^= 0x40; } if (BIT(data, 5)) { res ^= 0x04; } if ((offs & 1) == 0) { res = BITSWAP8(res, 7, 2, 5, 4, 3, 6, 1, 0); } dest[offs] = res; } }
public static extern void RenderStatic(InputHeader *inputHeader, InputDataArrays *inputData, int32_t visualizeLightCount, uint8_t *framebuffer_r, uint8_t *framebuffer_g, uint8_t *framebuffer_b) /*x75*/;
// take input from buf and remove useless whitespace, input and output can be // the same, result is null terminated, return the string length (minus the null termination) public static size_t Minify(uint8_t *buf, size_t len, uint8_t * @out) { if (!Avx2.IsSupported) { throw new NotSupportedException("AVX2 is required form SimdJson"); } //C#: load const vectors once (there is no `const _m256` in C#) Vector256 <byte> lut_cntrl = s_lut_cntrl; Vector256 <byte> low_nibble_mask = s_low_nibble_mask; Vector256 <byte> high_nibble_mask = s_high_nibble_mask; fixed(byte *mask128_epi8 = s_mask128_epi8) { // Useful constant masks const uint64_t even_bits = 0x5555555555555555UL; const uint64_t odd_bits = ~even_bits; uint8_t * initout = @out; uint64_t prev_iter_ends_odd_backslash = 0UL; // either 0 or 1, but a 64-bit value uint64_t prev_iter_inside_quote = 0UL; // either all zeros or all ones size_t idx = 0; if (len >= 64) { size_t avxlen = len - 63; for (; idx < avxlen; idx += 64) { Vector256 <byte> input_lo = Avx.LoadVector256((buf + idx + 0)); Vector256 <byte> input_hi = Avx.LoadVector256((buf + idx + 32)); uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi, Vector256.Create((byte)'\\')); uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; bool iter_ends_odd_backslash = add_overflow( bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1UL : 0x0UL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi, Vector256.Create((byte)'"')); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply( Vector128.Create(quote_bits, 0UL).AsUInt64(), Vector128.Create((byte)0xFF).AsUInt64(), 0)); quote_mask ^= prev_iter_inside_quote; prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // might be undefined behavior, should be fully defined in C++20, ok according to John Regher from Utah University Vector256 <byte> whitespace_shufti_mask = Vector256.Create((byte)0x18); Vector256 <byte> v_lo = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_lo), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(), Vector256.Create((byte)0x7f)))); Vector256 <byte> v_hi = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_hi), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(), Vector256.Create((byte)0x7f)))); Vector256 <byte> tmp_ws_lo = Avx2.CompareEqual( Avx2.And(v_lo, whitespace_shufti_mask), Vector256.Create((byte)0)); Vector256 <byte> tmp_ws_hi = Avx2.CompareEqual( Avx2.And(v_hi, whitespace_shufti_mask), Vector256.Create((byte)0)); uint64_t ws_res_0 = (uint32_t)Avx2.MoveMask(tmp_ws_lo); uint64_t ws_res_1 = (uint64_t)Avx2.MoveMask(tmp_ws_hi); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); whitespace &= ~quote_mask; int mask1 = (int)(whitespace & 0xFFFF); int mask2 = (int)((whitespace >> 16) & 0xFFFF); int mask3 = (int)((whitespace >> 32) & 0xFFFF); int mask4 = (int)((whitespace >> 48) & 0xFFFF); int pop1 = hamming((~whitespace) & 0xFFFF); int pop2 = hamming((~whitespace) & (ulong)(0xFFFFFFFF)); int pop3 = hamming((~whitespace) & (ulong)(0xFFFFFFFFFFFF)); int pop4 = hamming((~whitespace)); var vmask1 = _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask2 & 0x7FFF) * 2, (ulong *)mask128_epi8 + (mask1 & 0x7FFF) * 2); var vmask2 = _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask4 & 0x7FFF) * 2, (ulong *)mask128_epi8 + (mask3 & 0x7FFF) * 2); var result1 = Avx2.Shuffle(input_lo, vmask1.AsByte()); var result2 = Avx2.Shuffle(input_hi, vmask2.AsByte()); _mm256_storeu2_m128i((@out + pop1), @out, result1); _mm256_storeu2_m128i((@out + pop3), (@out + pop2), result2); @out += pop4; } } // we finish off the job... copying and pasting the code is not ideal here, // but it gets the job done. if (idx < len) { uint8_t *buffer = stackalloc uint8_t[64]; memset(buffer, 0, 64); memcpy(buffer, buf + idx, len - idx); var input_lo = Avx.LoadVector256((buffer)); var input_hi = Avx.LoadVector256((buffer + 32)); uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi, Vector256.Create((byte)'\\')); uint64_t start_edges = bs_bits & ~(bs_bits << 1); uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; //bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; // we never use it uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi, Vector256.Create((byte)'"')); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply( Vector128.Create(quote_bits, 0UL), Vector128.Create((byte)0xFF).AsUInt64(), 0)); quote_mask ^= prev_iter_inside_quote; // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we don't need this anymore Vector256 <byte> mask_20 = Vector256.Create((byte)0x20); // c==32 Vector256 <byte> mask_70 = Vector256.Create((byte)0x70); // adding 0x70 does not check low 4-bits // but moves any value >= 16 above 128 Vector256 <byte> tmp_ws_lo = Avx2.Or( Avx2.CompareEqual(mask_20, input_lo), Avx2.Shuffle(lut_cntrl, Avx2.AddSaturate(mask_70, input_lo))); Vector256 <byte> tmp_ws_hi = Avx2.Or( Avx2.CompareEqual(mask_20, input_hi), Avx2.Shuffle(lut_cntrl, Avx2.AddSaturate(mask_70, input_hi))); uint64_t ws_res_0 = (uint32_t)Avx2.MoveMask(tmp_ws_lo); uint64_t ws_res_1 = (uint64_t)Avx2.MoveMask(tmp_ws_hi); uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32)); whitespace &= ~quote_mask; if (len - idx < 64) { whitespace |= ((0xFFFFFFFFFFFFFFFF) << (int)(len - idx)); } int mask1 = (int)(whitespace & 0xFFFF); int mask2 = (int)((whitespace >> 16) & 0xFFFF); int mask3 = (int)((whitespace >> 32) & 0xFFFF); int mask4 = (int)((whitespace >> 48) & 0xFFFF); int pop1 = hamming((~whitespace) & 0xFFFF); int pop2 = hamming((~whitespace) & 0xFFFFFFFF); int pop3 = hamming((~whitespace) & 0xFFFFFFFFFFFF); int pop4 = hamming((~whitespace)); var vmask1 = _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask2 & 0x7FFF) * 2, (ulong *)mask128_epi8 + (mask1 & 0x7FFF) * 2); var vmask2 = _mm256_loadu2_m128i((ulong *)mask128_epi8 + (mask4 & 0x7FFF) * 2, (ulong *)mask128_epi8 + (mask3 & 0x7FFF) * 2); var result1 = Avx2.Shuffle(input_lo, vmask1.AsByte()); var result2 = Avx2.Shuffle(input_hi, vmask2.AsByte()); _mm256_storeu2_m128i((buffer + pop1), buffer, result1); _mm256_storeu2_m128i((buffer + pop3), (buffer + pop2), result2); memcpy(@out, buffer, (size_t)pop4); @out += pop4; } *@out = (byte)'\0'; // NULL termination return((size_t)@out - (size_t)initout); } }
// this should be called when parsing (right before writing the tapes) public void Init() { current_string_buf_loc = string_buf; current_loc = 0; isvalid = false; }
public static bool parse_number(uint8_t *buf, ParsedJson *pj, uint32_t offset, bool found_minus) { bytechar *p = (bytechar *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; if (!is_integer(*p)) { // a negative sign must be followed by an integer return(false); } } bytechar *startdigits = p; int64_t i; if (*p == '0') { // 0 cannot be followed by an integer ++p; if (is_not_structural_or_whitespace_or_exponent_or_decimal((uint8_t)(*p))) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } i = 0; } else { if (!(is_integer(*p))) { // must start with an integer #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); i = 10 * i + digit; // might overflow ++p; } } int64_t exponent = 0; if ('.' == *p) { ++p; bytechar *firstafterperiod = p; if (is_integer(*p)) { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); ++p; i = i * 10 + digit; } else { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } #if SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; // exponent -= 8; } #endif while (is_integer(*p)) { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); ++p; i = i * 10 + digit; // in rare cases, this will overflow, but that's ok because we have parse_highprecision_float later. } exponent = firstafterperiod - p; } int digitcount = (int)(p - startdigits - 1); int64_t expnumber = 0; // exponential part if (('e' == *p) || ('E' == *p)) { ++p; bool negexp = false; if ('-' == *p) { negexp = true; ++p; } else if ('+' == *p) { ++p; } if (!is_integer(*p)) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); expnumber = digit; p++; while (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { // we refuse to parse this #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } exponent += (negexp ? -expnumber : expnumber); } i = negative ? -i : i; if ((exponent != 0) || (expnumber != 0)) { if ((digitcount >= 19)) { // this is uncommon!!! // this is almost never going to get called!!! // we start anew, going slowly!!! return(parse_float(buf, pj, offset, found_minus)); } /////////// // We want 0.1e1 to be a float. ////////// if (i == 0) { pj->write_tape_double(0.0); #if JSON_TEST_NUMBERS // for unit testing foundFloat(0.0, buf + offset); #endif } else { if ((exponent > 308) || (exponent < -308)) { // we refuse to parse this #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } double d = i; d *= power_of_ten[308 + exponent]; // d = negative ? -d : d; pj->write_tape_double(d); #if JSON_TEST_NUMBERS // for unit testing foundFloat(d, buf + offset); #endif } } else { if ((digitcount >= 18)) { // this is uncommon!!! return(parse_large_integer(buf, pj, offset, found_minus)); } pj->write_tape_s64(i); #if JSON_TEST_NUMBERS // for unit testing foundInteger(i, buf + offset); #endif } return(is_structural_or_whitespace((uint8_t)(*p)) != 0); }
// called by parse_number when we know that the output is an integer, // but where there might be some integer overflow. // we want to catch overflows! // Do not call this function directly as it skips some of the checks from // parse_number // // This function will almost never be called!!! // static bool parse_large_integer(uint8_t *buf, ParsedJson *pj, uint32_t offset, bool found_minus) { bytechar *p = (bytechar *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; } uint64_t i; if (*p == '0') { // 0 cannot be followed by an integer ++p; i = 0; } else { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); i = digit; p++; // the is_made_of_eight_digits_fast routine is unlikely to help here because // we rarely see large integer parts like 123456789 while (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); if (mul_overflow(i, 10, &i)) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } if (add_overflow(i, digit, &i)) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } ++p; } } if (negative) { if (i > 0x8000000000000000) { // overflows! #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } } else { if (i >= 0x8000000000000000) { // overflows! #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); // overflow } } int64_t signed_answer = negative ? -(int64_t)i : (int64_t)i; pj->write_tape_s64(signed_answer); #if JSON_TEST_NUMBERS // for unit testing foundInteger(signed_answer, buf + offset); #endif return(is_structural_or_whitespace((byte)(*p)) != 0); }
// called by parse_number when we know that the output is a float, // but where there might be some integer overflow. The trick here is to // parse using floats from the start. // Do not call this function directly as it skips some of the checks from // parse_number // // This function will almost never be called!!! // // Note: a redesign could avoid this function entirely. // private static bool parse_float(uint8_t *buf, ParsedJson *pj, uint32_t offset, bool found_minus) { bytechar *p = (bytechar *)(buf + offset); bool negative = false; if (found_minus) { ++p; negative = true; } double i; if (*p == '0') { // 0 cannot be followed by an integer ++p; i = 0; } else { unsigned_bytechar digit = (unsigned_bytechar)(*p - (bytechar)'0'); i = digit; p++; while (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); i = 10 * i + digit; ++p; } } if ('.' == *p) { ++p; double fractionalweight = 1; if (is_integer(*p)) { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); ++p; fractionalweight *= 0.1; i = i + digit * fractionalweight; } else { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } while (is_integer(*p)) { unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); ++p; fractionalweight *= 0.1; i = i + digit * fractionalweight; } } if (('e' == *p) || ('E' == *p)) { ++p; bool negexp = false; if ('-' == *p) { negexp = true; ++p; } else if ('+' == *p) { ++p; } if (!is_integer(*p)) { #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } unsigned_bytechar digit = (unsigned_bytechar)(*p - '0'); int64_t expnumber = digit; // exponential part p++; if (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { digit = (unsigned_bytechar)(*p - '0'); expnumber = 10 * expnumber + digit; ++p; } if (is_integer(*p)) { // we refuse to parse this #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } int exponent = (int)(negexp ? -expnumber : expnumber); if ((exponent > 308) || (exponent < -308)) { // we refuse to parse this #if JSON_TEST_NUMBERS // for unit testing foundInvalidNumber(buf + offset); #endif return(false); } i *= power_of_ten[308 + exponent]; } if (is_not_structural_or_whitespace((byte)*p) != 0) { return(false); } double d = negative ? -i : i; pj->write_tape_double(d); #if JSON_TEST_NUMBERS // for unit testing foundFloat(d, buf + offset); #endif return(is_structural_or_whitespace((byte)(*p)) != 0); }
/* * find all devices in the piconet */ int sdp_general_inquiry(inquiry_info *ii, int dev_num, int duration, uint8_t *found);
internal static bool find_structural_bits(uint8_t* buf, size_t len, ParsedJson pj) { if (len > pj.bytecapacity) { Console.WriteLine("Your ParsedJson object only supports documents up to " + pj.bytecapacity + " bytes but you are trying to process " + len + " bytes\n"); return false; } uint32_t* base_ptr = pj.structural_indexes; uint32_t @base = 0; #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! var has_error = Vector256<byte>.Zero; var previous = new avx_processed_utf_bytes(); previous.rawbytes = Vector256<byte>.Zero; previous.high_nibbles = Vector256<byte>.Zero; previous.carried_continuations = Vector256<byte>.Zero; var highbit = Vector256.Create((byte)0x80); #endif const uint64_t even_bits = 0x5555555555555555UL; const uint64_t odd_bits = ~even_bits; // for now, just work in 64-byte chunks // we have padded the input out to 64 byte multiple with the remainder being // zeros // persistent state across loop uint64_t prev_iter_ends_odd_backslash = 0UL; // either 0 or 1, but a 64-bit value uint64_t prev_iter_inside_quote = 0UL; // either all zeros or all ones // effectively the very first char is considered to follow "whitespace" for the // purposes of psuedo-structural character detection uint64_t prev_iter_ends_pseudo_pred = 1UL; size_t lenminus64 = len < 64 ? 0 : len - 64; size_t idx = 0; uint64_t structurals = 0; // C#: assign static readonly fields to locals before the loop Vector256<byte> low_nibble_mask = s_low_nibble_mask; Vector256<byte> high_nibble_mask = s_high_nibble_mask; Vector256<byte> utf8ValidVec = s_utf8ValidVec; var structural_shufti_mask = Vector256.Create((byte)0x7); var whitespace_shufti_mask = Vector256.Create((byte)0x18); var slashVec = Vector256.Create((bytechar) '\\').AsByte(); var ffVec = Vector128.Create((byte) 0xFF).AsUInt64(); var doubleQuoteVec = Vector256.Create((byte)'"'); var zeroBVec = Vector256.Create((byte) 0); var vec7f = Vector256.Create((byte) 0x7f); for (; idx < lenminus64; idx += 64) { var input_lo = Avx.LoadVector256(buf + idx + 0); var input_hi = Avx.LoadVector256(buf + idx + 32); #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! if ((Avx.TestZ(Avx2.Or(input_lo, input_hi), highbit)) == true) { // it is ascii, we just check continuation has_error = Avx2.Or( Avx2.CompareGreaterThan(previous.carried_continuations.AsSByte(), utf8ValidVec, has_error); } else { // it is not ascii so we have to do heavy work previous = Utf8Validation.avxcheckUTF8Bytes(input_lo, ref previous, ref has_error); previous = Utf8Validation.avxcheckUTF8Bytes(input_hi, ref previous, ref has_error); } #endif //////////////////////////////////////////////////////////////////////////////////////////// // Step 1: detect odd sequences of backslashes //////////////////////////////////////////////////////////////////////////////////////////// /// uint64_t bs_bits = cmp_mask_against_input(input_lo, input_hi, slashVec); uint64_t start_edges = bs_bits & ~(bs_bits << 1); // flip lowest if we have an odd-length run at the end of the prior // iteration uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; // must record the carry-out of our odd-carries out of bit 63; this // indicates whether the sense of any edge going to the next iteration // should be flipped bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end // if we had an odd-numbered run at the // end of the previous iteration prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1UL : 0x0UL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; //////////////////////////////////////////////////////////////////////////////////////////// // Step 2: detect insides of quote pairs //////////////////////////////////////////////////////////////////////////////////////////// uint64_t quote_bits = cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply( Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0)); uint32_t cnt = (uint32_t) hamming(structurals); uint32_t next_base = @base + cnt; while (structurals != 0) { base_ptr[@base + 0] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 1] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 2] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 3] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 4] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 5] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 6] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 7] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); @base += 8; } @base = next_base; quote_mask ^= prev_iter_inside_quote; prev_iter_inside_quote = (uint64_t) ((int64_t) quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code var v_lo = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_lo), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(), vec7f))); var v_hi = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_hi), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(), vec7f))); var tmp_lo = Avx2.CompareEqual( Avx2.And(v_lo, structural_shufti_mask), zeroBVec); var tmp_hi = Avx2.CompareEqual( Avx2.And(v_hi, structural_shufti_mask), zeroBVec); uint64_t structural_res_0 = (uint32_t) Avx2.MoveMask(tmp_lo); uint64_t structural_res_1 = (uint64_t) Avx2.MoveMask(tmp_hi); structurals = ~(structural_res_0 | (structural_res_1 << 32)); var tmp_ws_lo = Avx2.CompareEqual( Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec); var tmp_ws_hi = Avx2.CompareEqual( Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec); uint64_t ws_res_0 = (uint32_t) Avx2.MoveMask(tmp_ws_lo); uint64_t ws_res_1 = (uint64_t) Avx2.MoveMask(tmp_ws_hi); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); // mask off anything inside quotes structurals &= ~quote_mask; // add the real quote bits back into our bitmask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent // passes will get a chance to encounter the first character of every string // of non-whitespace and, if we're parsing an atom like true/false/null or a // number we can stop at the first whitespace or structural character // following it. // a qualified predecessor is something that can happen 1 position before an // psuedo-structural character uint64_t pseudo_pred = structurals | whitespace; uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; uint64_t pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask); structurals |= pseudo_structurals; // now, we've used our close quotes all we need to. So let's switch them off // they will be off in the quote mask and on in quote bits. structurals &= ~(quote_bits & ~quote_mask); //Console.WriteLine($"Iter: {idx}, satur: {structurals}"); //*(uint64_t *)(pj.structurals + idx / 8) = structurals; } //////////////// /// we use a giant copy-paste which is ugly. /// but otherwise the string needs to be properly padded or else we /// risk invalidating the UTF-8 checks. //////////// if (idx < len) { uint8_t* tmpbuf = stackalloc uint8_t[64]; memset(tmpbuf, 0x20, 64); memcpy(tmpbuf, buf + idx, len - idx); Vector256<byte> input_lo = Avx.LoadVector256(tmpbuf + 0); Vector256<byte> input_hi = Avx.LoadVector256(tmpbuf + 32); #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! var highbit = Vector256.Create((byte)0x80); if ((Avx.TestZ(Avx2.Or(input_lo, input_hi), highbit)) == true) { // it is ascii, we just check continuation has_error = Avx2.Or( Avx2.CompareGreaterThan(previous.carried_continuations.AsSByte(), utf8ValidVec).AsByte(), has_error); } else { // it is not ascii so we have to do heavy work previous = Utf8Validation.avxcheckUTF8Bytes(input_lo, ref previous, ref has_error); previous = Utf8Validation.avxcheckUTF8Bytes(input_hi, ref previous, ref has_error); } #endif //////////////////////////////////////////////////////////////////////////////////////////// // Step 1: detect odd sequences of backslashes //////////////////////////////////////////////////////////////////////////////////////////// uint64_t bs_bits = cmp_mask_against_input(input_lo, input_hi, slashVec); uint64_t start_edges = bs_bits & ~(bs_bits << 1); // flip lowest if we have an odd-length run at the end of the prior // iteration uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; // must record the carry-out of our odd-carries out of bit 63; this // indicates whether the sense of any edge going to the next iteration // should be flipped //bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end // if we had an odd-numbered run at the // end of the previous iteration //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; //////////////////////////////////////////////////////////////////////////////////////////// // Step 2: detect insides of quote pairs //////////////////////////////////////////////////////////////////////////////////////////// uint64_t quote_bits = cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = (uint64_t)Sse2.X64.ConvertToInt64(Pclmulqdq.CarrylessMultiply( Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0).AsInt64()); quote_mask ^= prev_iter_inside_quote; //BUG? https://github.com/dotnet/coreclr/issues/22813 //quote_mask = 60; //prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20 uint32_t cnt = (uint32_t)hamming(structurals); uint32_t next_base = @base + cnt; while (structurals != 0) { base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); @base += 8; } @base = next_base; // How do we build up a user traversable data structure // first, do a 'shufti' to detect structural JSON characters // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c // these go into the first 3 buckets of the comparison (1/2/4) // we are also interested in the four whitespace characters // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d // these go into the next 2 buckets of the comparison (8/16) var v_lo = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_lo), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(), vec7f))); var v_hi = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_hi), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(), vec7f))); var tmp_lo = Avx2.CompareEqual( Avx2.And(v_lo, structural_shufti_mask), zeroBVec); var tmp_hi = Avx2.CompareEqual( Avx2.And(v_hi, structural_shufti_mask), zeroBVec); uint64_t structural_res_0 = (uint32_t)Avx2.MoveMask(tmp_lo); uint64_t structural_res_1 = (uint64_t)Avx2.MoveMask(tmp_hi); structurals = ~(structural_res_0 | (structural_res_1 << 32)); // this additional mask and transfer is non-trivially expensive, // unfortunately var tmp_ws_lo = Avx2.CompareEqual( Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec); var tmp_ws_hi = Avx2.CompareEqual( Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec); uint64_t ws_res_0 = (uint32_t)Avx2.MoveMask(tmp_ws_lo); uint64_t ws_res_1 = (uint64_t)Avx2.MoveMask(tmp_ws_hi); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); // mask off anything inside quotes structurals &= ~quote_mask; // add the real quote bits back into our bitmask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent // passes will get a chance to encounter the first character of every string // of non-whitespace and, if we're parsing an atom like true/false/null or a // number we can stop at the first whitespace or structural character // following it. // a qualified predecessor is something that can happen 1 position before an // psuedo-structural character uint64_t pseudo_pred = structurals | whitespace; uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; uint64_t pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask); structurals |= pseudo_structurals; // now, we've used our close quotes all we need to. So let's switch them off // they will be off in the quote mask and on in quote bits. structurals &= ~(quote_bits & ~quote_mask); //*(uint64_t *)(pj.structurals + idx / 8) = structurals; idx += 64; } uint32_t cnt2 = (uint32_t)hamming(structurals); uint32_t next_base2 = @base + cnt2; while (structurals != 0) { base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); @base += 8; } @base = next_base2; pj.n_structural_indexes = @base; if (base_ptr[pj.n_structural_indexes - 1] > len) { throw new InvalidOperationException("Internal bug"); } if (len != base_ptr[pj.n_structural_indexes - 1]) { // the string might not be NULL terminated, but we add a virtual NULL ending character. base_ptr[pj.n_structural_indexes++] = (uint32_t)len; } base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! return Avx.TestZ(has_error, has_error); #else return true; #endif }
public static bool parse_string(uint8_t *buf, size_t len, ParsedJson *pj, uint32_t depth, uint32_t offset) { #if SIMDJSON_SKIPSTRINGPARSING // for performance analysis, it is sometimes useful to skip parsing pj->write_tape(0, '"'); // don't bother with the string parsing at all return(true); // always succeeds #else uint8_t *src = &buf[offset + 1]; // we know that buf at offset is a " uint8_t *dst = pj->current_string_buf_loc; #if JSON_TEST_STRINGS // for unit testing uint8_t *const start_of_string = dst; #endif var slashVec = Vector256.Create((byte)'\\'); var quoteVec = Vector256.Create((byte)'"'); while (true) { Vector256 <byte> v = Avx2.LoadVector256((src)); uint32_t bs_bits = (uint32_t)Avx2.MoveMask(Avx2.CompareEqual(v, slashVec)); uint32_t quote_bits = (uint32_t)Avx2.MoveMask(Avx2.CompareEqual(v, quoteVec)); // All Unicode characters may be placed within the // quotation marks, except for the characters that MUST be escaped: // quotation mark, reverse solidus, and the control characters (U+0000 //through U+001F). // https://tools.ietf.org/html/rfc8259 #if CHECKUNESCAPED var unitsep = Vector256.Create((byte)0x1F); var unescaped_vec = Avx2.CompareEqual(Avx2.Max(unitsep, v), unitsep); // could do it with saturated subtraction #endif // CHECKUNESCAPED uint32_t quote_dist = (uint32_t)trailingzeroes(quote_bits); uint32_t bs_dist = (uint32_t)trailingzeroes(bs_bits); // store to dest unconditionally - we can overwrite the bits we don't like // later Avx.Store((dst), v); if (quote_dist < bs_dist) { // we encountered quotes first. Move dst to point to quotes and exit dst[quote_dist] = 0; // null terminate and get out pj->write_tape((size_t)pj->current_string_buf_loc - (size_t)pj->string_buf, (uint8_t)'"'); pj->current_string_buf_loc = dst + quote_dist + 1; // the +1 is due to the 0 value #if CHECKUNESCAPED // check that there is no unescaped char before the quote uint32_t unescaped_bits = (uint32_t)Avx2.MoveMask(unescaped_vec); bool is_ok = ((quote_bits - 1) & (~quote_bits) & unescaped_bits) == 0; #if JSON_TEST_STRINGS // for unit testing if (is_ok) { foundString(buf + offset, start_of_string, pj->current_string_buf_loc - 1); } else { foundBadString(buf + offset); } #endif // JSON_TEST_STRINGS return(is_ok); #else //CHECKUNESCAPED #if JSON_TEST_STRINGS // for unit testing foundString(buf + offset, start_of_string, pj->current_string_buf_loc - 1); #endif // JSON_TEST_STRINGS return(true); #endif //CHECKUNESCAPED } else if (quote_dist > bs_dist) { uint8_t escape_char = src[bs_dist + 1]; #if CHECKUNESCAPED // we are going to need the unescaped_bits to check for unescaped chars uint32_t unescaped_bits = (uint32_t)Avx2.MoveMask(unescaped_vec); if (((bs_bits - 1) & (~bs_bits) & unescaped_bits) != 0) { #if JSON_TEST_STRINGS // for unit testing foundBadString(buf + offset); #endif // JSON_TEST_STRINGS return(false); } #endif //CHECKUNESCAPED // we encountered backslash first. Handle backslash if (escape_char == 'u') { // move src/dst up to the start; they will be further adjusted // within the unicode codepoint handling code. src += bs_dist; dst += bs_dist; if (!handle_unicode_codepoint(&src, &dst)) { #if JSON_TEST_STRINGS // for unit testing foundBadString(buf + offset); #endif // JSON_TEST_STRINGS return(false); } } else { // simple 1:1 conversion. Will eat bs_dist+2 characters in input and // write bs_dist+1 characters to output // note this may reach beyond the part of the buffer we've actually // seen. I think this is ok uint8_t escape_result = escape_map[escape_char]; if (escape_result == 0) { #if JSON_TEST_STRINGS // for unit testing foundBadString(buf + offset); #endif // JSON_TEST_STRINGS return(false); // bogus escape value is an error } dst[bs_dist] = escape_result; src += bs_dist + 2; dst += bs_dist + 1; } } else { // they are the same. Since they can't co-occur, it means we encountered // neither. src += 32; dst += 32; #if CHECKUNESCAPED // check for unescaped chars if (Avx.TestZ(unescaped_vec, unescaped_vec) != true) { #if JSON_TEST_STRINGS // for unit testing foundBadString(buf + offset); #endif // JSON_TEST_STRINGS return(false); } #endif // CHECKUNESCAPED } } // can't be reached return(true); #endif // SIMDJSON_SKIPSTRINGPARSING }
public static extern void ShadeTile(int32_t tileStartX, int32_t tileEndX, int32_t tileStartY, int32_t tileEndY, int32_t gBufferWidth, int32_t gBufferHeight, InputDataArrays *inputData, float cameraProj_11, float cameraProj_22, float cameraProj_33, float cameraProj_43, int32_t *tileLightIndices, int32_t tileNumLights, bool visualizeLightCount, uint8_t *framebuffer_r, uint8_t *framebuffer_g, uint8_t *framebuffer_b) /*x77*/;