public void sfmt_fill_array32(uint32_t *array, int size) { assert(idx == SFMT_N32); assert(size % 4 == 0); assert(size >= SFMT_N32); gen_rand_array((w128_t *)array, size / 4); idx = SFMT_N32; }
// if needed, allocate memory so that the object is able to process JSON // documents having up to len bytes and maxdepth "depth" public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) { if ((maxdepth == 0) || (len == 0)) { return(false); } if (len > SIMDJSON_MAXSIZE_BYTES) { return(false); } if ((len <= bytecapacity) && (depthcapacity < maxdepth)) { return(true); } Deallocate(); isvalid = false; bytecapacity = 0; // will only set it to len after allocations are a success n_structural_indexes = 0; uint32_t max_structures = (uint32_t)(ROUNDUP_N(len, 64) + 2 + 7); structural_indexes = allocate <uint32_t>(max_structures); // a pathological input like "[[[[..." would generate len tape elements, so need a capacity of len + 1 size_t localtapecapacity = ROUNDUP_N(len + 1, 64); // a document with only zero-length strings... could have len/3 string // and we would need len/3 * 5 bytes on the string buffer size_t localstringcapacity = ROUNDUP_N(5 * len / 3 + 32, 64); string_buf = allocate <uint8_t>(localstringcapacity); tape = allocate <uint64_t>(localtapecapacity); containing_scope_offset = allocate <uint32_t>(maxdepth); ret_address = allocate <char1>(maxdepth); if ((string_buf == null) || (tape == null) || (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null)) { delete(ret_address); delete(containing_scope_offset); delete(tape); delete(string_buf); delete(structural_indexes); return(false); } /* * // We do not need to initialize this content for parsing, though we could * // need to initialize it for safety. * memset(string_buf, 0 , localstringcapacity); * memset(structural_indexes, 0, max_structures * sizeof(uint32_t)); * memset(tape, 0, localtapecapacity * sizeof(uint64_t)); */ bytecapacity = len; depthcapacity = maxdepth; tapecapacity = localtapecapacity; stringcapacity = localstringcapacity; return(true); }
public uint32_t sfmt_genrand_uint32() { if (idx >= SFMT_N32) { sfmt_gen_rand_all(); idx = 0; } fixed(w128_t *state = this.state) { uint32_t *psfmt32 = &state[0].u32_0; return(psfmt32[idx++]); } }
/** * This function initializes the internal state array with a 32-bit * integer seed. * * @param sfmt SFMT internal state * @param seed a 32-bit integer used as the seed. */ public void sfmt_init_gen_rand(uint32_t seed) { fixed(w128_t *state = this.state) { uint32_t *psfmt32 = &state[0].u32_0; psfmt32[0] = seed; for (uint i = 1; i < SFMT_N32; i++) { psfmt32[i] = 1812433253U * (psfmt32[i - 1] ^ (psfmt32[i - 1] >> 30)) + i; } } idx = SFMT_N32; period_certification(); }
/************************************************************************* * v is the n word data vector * k is the 4 word key,128bits * n is negative for decoding * if n is zero result is 1 and no coding or decoding takes place, * otherwise the result is zero * assumes 32 bit 'long' and same endian coding and decoding **************************************************************************/ void btea(uint32_t *v, int16_t n, uint32_t *key) { const DELTA =; const MX =; uint32_t y, z, sum; uint32_t p, rounds, e; if (n > 1) { // dencrypt rounds = 6 + 52 / n; sum = 0; z = v[n - 1]; do { sum += 0x9e3779b9; e = (sum >> 2) & 3; for (p = 0; p < n - 1; p++) { y = v[p + 1]; z = v[p] += (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z))); } y = v[0]; z = v[n - 1] += (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z))); } while (--rounds); } else if (n < -1) { //dencrypt n = -n; rounds = 6 + 52 / n; sum = rounds * 0x9e3779b9; y = v[0]; do { e = (sum >> 2) & 3; for (p = n - 1; p > 0; p--) { z = v[p - 1]; y = v[p] -= (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z))); } z = v[n - 1]; y = v[0] -= (((z >> 5 ^ y << 2) + (y >> 3 ^ z << 4)) ^ ((sum ^ y) + (key[(p & 3) ^ e] ^ z))); sum -= 0x9e3779b9; } while (--rounds); } }
public uint64_t sfmt_genrand_uint64() { assert(idx % 2 == 0); if (idx >= SFMT_N32) { sfmt_gen_rand_all(); idx = 0; } fixed(w128_t *state = this.state) { uint32_t *psfmt32 = &state[0].u32_0; var r = *(uint64_t *)(psfmt32 + idx); idx += 2; return(r); } }
// if needed, allocate memory so that the object is able to process JSON // documents having up to len butes and maxdepth "depth" public bool AllocateCapacity(size_t len, size_t maxdepth = DEFAULTMAXDEPTH) { if ((maxdepth == 0) || (len == 0)) { Debug.WriteLine("capacities must be non-zero "); return(false); } if (len > 0) { if ((len <= bytecapacity) && (depthcapacity < maxdepth)) { return(true); } Deallocate(); } isvalid = false; bytecapacity = 0; // will only set it to len after allocations are a success n_structural_indexes = 0; uint32_t max_structures = (uint32_t)ROUNDUP_N(len, 64) + 2 + 7; structural_indexes = Utils.allocate <uint32_t>(max_structures); size_t localtapecapacity = ROUNDUP_N(len, 64); size_t localstringcapacity = ROUNDUP_N(len, 64); string_buf = Utils.allocate <uint8_t>(localstringcapacity); tape = Utils.allocate <uint64_t>(localtapecapacity); containing_scope_offset = Utils.allocate <uint32_t>(maxdepth); ret_address = Utils.allocate <bytechar>(maxdepth); if ((string_buf == null) || (tape == null) || (containing_scope_offset == null) || (ret_address == null) || (structural_indexes == null)) { Deallocate(); return(false); } bytecapacity = len; depthcapacity = maxdepth; tapecapacity = localtapecapacity; stringcapacity = localstringcapacity; return(true); }
private void Deallocate() { isvalid = false; bytecapacity = 0; depthcapacity = 0; tapecapacity = 0; stringcapacity = 0; if (ret_address != null) { delete(ret_address); ret_address = null; } if (containing_scope_offset != null) { delete(containing_scope_offset); containing_scope_offset = null; } if (tape != null) { delete(tape); tape = null; } if (string_buf != null) { delete(string_buf); string_buf = null; } if (structural_indexes != null) { delete(structural_indexes); structural_indexes = null; } }
/** * This function certificate the period of 2^{MEXP} * @param sfmt SFMT internal state */ void period_certification() { fixed(w128_t *state = this.state) { uint32_t *psfmt32 = &state[0].u32_0; uint inner = 0; for (int i = 0; i < 4; i++) { inner ^= psfmt32[i] & parity[i]; } for (int i = 16; i > 0; i >>= 1) { inner ^= inner >> i; } inner &= 1; /* check OK */ if (inner == 1) { return; } /* check NG, and modification */ for (int i = 0; i < 4; i++) { uint32_t work = 1; for (int j = 0; j < 32; j++) { if ((work & parity[i]) != 0) { psfmt32[i] ^= work; return; } work = work << 1; } } } }
internal static bool find_structural_bits(uint8_t* buf, size_t len, ParsedJson pj) { if (len > pj.bytecapacity) { Console.WriteLine("Your ParsedJson object only supports documents up to " + pj.bytecapacity + " bytes but you are trying to process " + len + " bytes\n"); return false; } uint32_t* base_ptr = pj.structural_indexes; uint32_t @base = 0; #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! var has_error = Vector256<byte>.Zero; var previous = new avx_processed_utf_bytes(); previous.rawbytes = Vector256<byte>.Zero; previous.high_nibbles = Vector256<byte>.Zero; previous.carried_continuations = Vector256<byte>.Zero; var highbit = Vector256.Create((byte)0x80); #endif const uint64_t even_bits = 0x5555555555555555UL; const uint64_t odd_bits = ~even_bits; // for now, just work in 64-byte chunks // we have padded the input out to 64 byte multiple with the remainder being // zeros // persistent state across loop uint64_t prev_iter_ends_odd_backslash = 0UL; // either 0 or 1, but a 64-bit value uint64_t prev_iter_inside_quote = 0UL; // either all zeros or all ones // effectively the very first char is considered to follow "whitespace" for the // purposes of psuedo-structural character detection uint64_t prev_iter_ends_pseudo_pred = 1UL; size_t lenminus64 = len < 64 ? 0 : len - 64; size_t idx = 0; uint64_t structurals = 0; // C#: assign static readonly fields to locals before the loop Vector256<byte> low_nibble_mask = s_low_nibble_mask; Vector256<byte> high_nibble_mask = s_high_nibble_mask; Vector256<byte> utf8ValidVec = s_utf8ValidVec; var structural_shufti_mask = Vector256.Create((byte)0x7); var whitespace_shufti_mask = Vector256.Create((byte)0x18); var slashVec = Vector256.Create((bytechar) '\\').AsByte(); var ffVec = Vector128.Create((byte) 0xFF).AsUInt64(); var doubleQuoteVec = Vector256.Create((byte)'"'); var zeroBVec = Vector256.Create((byte) 0); var vec7f = Vector256.Create((byte) 0x7f); for (; idx < lenminus64; idx += 64) { var input_lo = Avx.LoadVector256(buf + idx + 0); var input_hi = Avx.LoadVector256(buf + idx + 32); #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! if ((Avx.TestZ(Avx2.Or(input_lo, input_hi), highbit)) == true) { // it is ascii, we just check continuation has_error = Avx2.Or( Avx2.CompareGreaterThan(previous.carried_continuations.AsSByte(), utf8ValidVec, has_error); } else { // it is not ascii so we have to do heavy work previous = Utf8Validation.avxcheckUTF8Bytes(input_lo, ref previous, ref has_error); previous = Utf8Validation.avxcheckUTF8Bytes(input_hi, ref previous, ref has_error); } #endif //////////////////////////////////////////////////////////////////////////////////////////// // Step 1: detect odd sequences of backslashes //////////////////////////////////////////////////////////////////////////////////////////// /// uint64_t bs_bits = cmp_mask_against_input(input_lo, input_hi, slashVec); uint64_t start_edges = bs_bits & ~(bs_bits << 1); // flip lowest if we have an odd-length run at the end of the prior // iteration uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; // must record the carry-out of our odd-carries out of bit 63; this // indicates whether the sense of any edge going to the next iteration // should be flipped bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end // if we had an odd-numbered run at the // end of the previous iteration prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1UL : 0x0UL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; //////////////////////////////////////////////////////////////////////////////////////////// // Step 2: detect insides of quote pairs //////////////////////////////////////////////////////////////////////////////////////////// uint64_t quote_bits = cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply( Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0)); uint32_t cnt = (uint32_t) hamming(structurals); uint32_t next_base = @base + cnt; while (structurals != 0) { base_ptr[@base + 0] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 1] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 2] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 3] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 4] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 5] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 6] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 7] = (uint32_t) idx - 64 + (uint32_t) trailingzeroes(structurals); structurals = structurals & (structurals - 1); @base += 8; } @base = next_base; quote_mask ^= prev_iter_inside_quote; prev_iter_inside_quote = (uint64_t) ((int64_t) quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code var v_lo = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_lo), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(), vec7f))); var v_hi = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_hi), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(), vec7f))); var tmp_lo = Avx2.CompareEqual( Avx2.And(v_lo, structural_shufti_mask), zeroBVec); var tmp_hi = Avx2.CompareEqual( Avx2.And(v_hi, structural_shufti_mask), zeroBVec); uint64_t structural_res_0 = (uint32_t) Avx2.MoveMask(tmp_lo); uint64_t structural_res_1 = (uint64_t) Avx2.MoveMask(tmp_hi); structurals = ~(structural_res_0 | (structural_res_1 << 32)); var tmp_ws_lo = Avx2.CompareEqual( Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec); var tmp_ws_hi = Avx2.CompareEqual( Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec); uint64_t ws_res_0 = (uint32_t) Avx2.MoveMask(tmp_ws_lo); uint64_t ws_res_1 = (uint64_t) Avx2.MoveMask(tmp_ws_hi); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); // mask off anything inside quotes structurals &= ~quote_mask; // add the real quote bits back into our bitmask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent // passes will get a chance to encounter the first character of every string // of non-whitespace and, if we're parsing an atom like true/false/null or a // number we can stop at the first whitespace or structural character // following it. // a qualified predecessor is something that can happen 1 position before an // psuedo-structural character uint64_t pseudo_pred = structurals | whitespace; uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; uint64_t pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask); structurals |= pseudo_structurals; // now, we've used our close quotes all we need to. So let's switch them off // they will be off in the quote mask and on in quote bits. structurals &= ~(quote_bits & ~quote_mask); //Console.WriteLine($"Iter: {idx}, satur: {structurals}"); //*(uint64_t *)(pj.structurals + idx / 8) = structurals; } //////////////// /// we use a giant copy-paste which is ugly. /// but otherwise the string needs to be properly padded or else we /// risk invalidating the UTF-8 checks. //////////// if (idx < len) { uint8_t* tmpbuf = stackalloc uint8_t[64]; memset(tmpbuf, 0x20, 64); memcpy(tmpbuf, buf + idx, len - idx); Vector256<byte> input_lo = Avx.LoadVector256(tmpbuf + 0); Vector256<byte> input_hi = Avx.LoadVector256(tmpbuf + 32); #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! var highbit = Vector256.Create((byte)0x80); if ((Avx.TestZ(Avx2.Or(input_lo, input_hi), highbit)) == true) { // it is ascii, we just check continuation has_error = Avx2.Or( Avx2.CompareGreaterThan(previous.carried_continuations.AsSByte(), utf8ValidVec).AsByte(), has_error); } else { // it is not ascii so we have to do heavy work previous = Utf8Validation.avxcheckUTF8Bytes(input_lo, ref previous, ref has_error); previous = Utf8Validation.avxcheckUTF8Bytes(input_hi, ref previous, ref has_error); } #endif //////////////////////////////////////////////////////////////////////////////////////////// // Step 1: detect odd sequences of backslashes //////////////////////////////////////////////////////////////////////////////////////////// uint64_t bs_bits = cmp_mask_against_input(input_lo, input_hi, slashVec); uint64_t start_edges = bs_bits & ~(bs_bits << 1); // flip lowest if we have an odd-length run at the end of the prior // iteration uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; uint64_t even_starts = start_edges & even_start_mask; uint64_t odd_starts = start_edges & ~even_start_mask; uint64_t even_carries = bs_bits + even_starts; uint64_t odd_carries; // must record the carry-out of our odd-carries out of bit 63; this // indicates whether the sense of any edge going to the next iteration // should be flipped //bool iter_ends_odd_backslash = add_overflow(bs_bits, odd_starts, &odd_carries); odd_carries |= prev_iter_ends_odd_backslash; // push in bit zero as a potential end // if we had an odd-numbered run at the // end of the previous iteration //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; uint64_t even_carry_ends = even_carries & ~bs_bits; uint64_t odd_carry_ends = odd_carries & ~bs_bits; uint64_t even_start_odd_end = even_carry_ends & odd_bits; uint64_t odd_start_even_end = odd_carry_ends & even_bits; uint64_t odd_ends = even_start_odd_end | odd_start_even_end; //////////////////////////////////////////////////////////////////////////////////////////// // Step 2: detect insides of quote pairs //////////////////////////////////////////////////////////////////////////////////////////// uint64_t quote_bits = cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec); quote_bits = quote_bits & ~odd_ends; uint64_t quote_mask = (uint64_t)Sse2.X64.ConvertToInt64(Pclmulqdq.CarrylessMultiply( Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0).AsInt64()); quote_mask ^= prev_iter_inside_quote; //BUG? https://github.com/dotnet/coreclr/issues/22813 //quote_mask = 60; //prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20 uint32_t cnt = (uint32_t)hamming(structurals); uint32_t next_base = @base + cnt; while (structurals != 0) { base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); @base += 8; } @base = next_base; // How do we build up a user traversable data structure // first, do a 'shufti' to detect structural JSON characters // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c // these go into the first 3 buckets of the comparison (1/2/4) // we are also interested in the four whitespace characters // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d // these go into the next 2 buckets of the comparison (8/16) var v_lo = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_lo), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(), vec7f))); var v_hi = Avx2.And( Avx2.Shuffle(low_nibble_mask, input_hi), Avx2.Shuffle(high_nibble_mask, Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(), vec7f))); var tmp_lo = Avx2.CompareEqual( Avx2.And(v_lo, structural_shufti_mask), zeroBVec); var tmp_hi = Avx2.CompareEqual( Avx2.And(v_hi, structural_shufti_mask), zeroBVec); uint64_t structural_res_0 = (uint32_t)Avx2.MoveMask(tmp_lo); uint64_t structural_res_1 = (uint64_t)Avx2.MoveMask(tmp_hi); structurals = ~(structural_res_0 | (structural_res_1 << 32)); // this additional mask and transfer is non-trivially expensive, // unfortunately var tmp_ws_lo = Avx2.CompareEqual( Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec); var tmp_ws_hi = Avx2.CompareEqual( Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec); uint64_t ws_res_0 = (uint32_t)Avx2.MoveMask(tmp_ws_lo); uint64_t ws_res_1 = (uint64_t)Avx2.MoveMask(tmp_ws_hi); uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32)); // mask off anything inside quotes structurals &= ~quote_mask; // add the real quote bits back into our bitmask as well, so we can // quickly traverse the strings we've spent all this trouble gathering structurals |= quote_bits; // Now, establish "pseudo-structural characters". These are non-whitespace // characters that are (a) outside quotes and (b) have a predecessor that's // either whitespace or a structural character. This means that subsequent // passes will get a chance to encounter the first character of every string // of non-whitespace and, if we're parsing an atom like true/false/null or a // number we can stop at the first whitespace or structural character // following it. // a qualified predecessor is something that can happen 1 position before an // psuedo-structural character uint64_t pseudo_pred = structurals | whitespace; uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred; prev_iter_ends_pseudo_pred = pseudo_pred >> 63; uint64_t pseudo_structurals = shifted_pseudo_pred & (~whitespace) & (~quote_mask); structurals |= pseudo_structurals; // now, we've used our close quotes all we need to. So let's switch them off // they will be off in the quote mask and on in quote bits. structurals &= ~(quote_bits & ~quote_mask); //*(uint64_t *)(pj.structurals + idx / 8) = structurals; idx += 64; } uint32_t cnt2 = (uint32_t)hamming(structurals); uint32_t next_base2 = @base + cnt2; while (structurals != 0) { base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals); structurals = structurals & (structurals - 1); @base += 8; } @base = next_base2; pj.n_structural_indexes = @base; if (base_ptr[pj.n_structural_indexes - 1] > len) { throw new InvalidOperationException("Internal bug"); } if (len != base_ptr[pj.n_structural_indexes - 1]) { // the string might not be NULL terminated, but we add a virtual NULL ending character. base_ptr[pj.n_structural_indexes++] = (uint32_t)len; } base_ptr[pj.n_structural_indexes] = 0; // make it safe to dereference one beyond this array #if SIMDJSON_UTF8VALIDATE // NOT TESTED YET! return Avx.TestZ(has_error, has_error); #else return true; #endif }
public void dsfmt_init_by_array(uint32_t *init_key, uint key_length) { dsfmt_chk_init_by_array(init_key, key_length); }
/** * This function initializes the internal state array, * with an array of 32-bit integers used as the seeds * @param dsfmt dsfmt state vector. * @param init_key the array of 32-bit integers, used as a seed. * @param key_length the length of init_key. * @param mexp caller's mersenne expornent */ void dsfmt_chk_init_by_array(uint32_t *init_key, uint key_length) { uint i, j, count; uint32_t r; const int size = (DSFMT_N + 1) * 4; /* pulmonary */ const int lag = (size >= 623) ? 11 : (size >= 68) ? 7 : (size >= 39) ? 5 : 3; const int mid = (size - lag) / 2; fixed(w128_t *status = this.status) { var psfmt32 = &status[0].u32_0; //memset(status, 0x8b, sizeof(status)); for (var idx = 0; idx <= DSFMT_N; ++idx) { status[idx].u64_0 = 0x8b8b8b8b8b8b8b8bUL; status[idx].u64_1 = 0x8b8b8b8b8b8b8b8bUL; } if (key_length + 1 > size) { count = key_length + 1; } else { count = size; } r = ini_func1(psfmt32[0] ^ psfmt32[mid % size] ^ psfmt32[(size - 1) % size]); psfmt32[mid % size] += r; r += key_length; psfmt32[(mid + lag) % size] += r; psfmt32[0] = r; count--; for (i = 1, j = 0; (j < count) && (j < key_length); j++) { r = ini_func1(psfmt32[i] ^ psfmt32[(i + mid) % size] ^ psfmt32[(i + size - 1) % size]); psfmt32[(i + mid) % size] += r; r += init_key[j] + i; psfmt32[(i + mid + lag) % size] += r; psfmt32[i] = r; i = (i + 1) % size; } for (; j < count; j++) { r = ini_func1(psfmt32[i] ^ psfmt32[(i + mid) % size] ^ psfmt32[(i + size - 1) % size]); psfmt32[(i + mid) % size] += r; r += i; psfmt32[(i + mid + lag) % size] += r; psfmt32[i] = r; i = (i + 1) % size; } for (j = 0; j < size; j++) { r = ini_func2(psfmt32[i] + psfmt32[(i + mid) % size] + psfmt32[(i + size - 1) % size]); psfmt32[(i + mid) % size] ^= r; r -= i; psfmt32[(i + mid + lag) % size] ^= r; psfmt32[i] = r; i = (i + 1) % size; } } initial_mask(); period_certification(); idx = DSFMT_N64; }
/** * This function initializes the internal state array, * with an array of 32-bit integers used as the seeds * @param sfmt SFMT internal state * @param init_key the array of 32-bit integers, used as a seed. * @param key_length the length of init_key. */ public void sfmt_init_by_array(uint32_t *init_key, uint key_length) { const int size = SFMT_N * 4; const int lag = (size >= 623) ? 11 : (size >= 68) ? 7 : (size >= 39) ? 5 : 3; const int mid = (size - lag) / 2; fixed(w128_t *state = this.state) { uint32_t *psfmt32 = &state[0].u32_0; //memset(psfmt32, 0x8b, sizeof(sfmt_t)); for (var idx = 0; idx < SFMT_N; ++idx) { state[idx].u32_0 = 0x8b8b8b8bU; state[idx].u32_1 = 0x8b8b8b8bU; state[idx].u32_2 = 0x8b8b8b8bU; state[idx].u32_3 = 0x8b8b8b8bU; } uint count; if (key_length + 1 > SFMT_N32) { count = key_length + 1; } else { count = SFMT_N32; } uint32_t r; r = func1(psfmt32[0] ^ psfmt32[mid] ^ psfmt32[SFMT_N32 - 1]); psfmt32[mid] += r; r += key_length; psfmt32[mid + lag] += r; psfmt32[0] = r; count--; uint i, j; for (i = 1, j = 0; (j < count) && (j < key_length); j++) { r = func1(psfmt32[i] ^ psfmt32[(i + mid) % SFMT_N32] ^ psfmt32[(i + SFMT_N32 - 1) % SFMT_N32]); psfmt32[(i + mid) % SFMT_N32] += r; r += init_key[j] + i; psfmt32[(i + mid + lag) % SFMT_N32] += r; psfmt32[i] = r; i = (i + 1) % SFMT_N32; } for (; j < count; j++) { r = func1(psfmt32[i] ^ psfmt32[(i + mid) % SFMT_N32] ^ psfmt32[(i + SFMT_N32 - 1) % SFMT_N32]); psfmt32[(i + mid) % SFMT_N32] += r; r += i; psfmt32[(i + mid + lag) % SFMT_N32] += r; psfmt32[i] = r; i = (i + 1) % SFMT_N32; } for (j = 0; j < SFMT_N32; j++) { r = func2(psfmt32[i] + psfmt32[(i + mid) % SFMT_N32] + psfmt32[(i + SFMT_N32 - 1) % SFMT_N32]); psfmt32[(i + mid) % SFMT_N32] ^= r; r -= i; psfmt32[(i + mid + lag) % SFMT_N32] ^= r; psfmt32[i] = r; i = (i + 1) % SFMT_N32; } } idx = SFMT_N32; period_certification(); }
internal static JsonParseError find_structural_bits(uint8_t *buf, size_t len, ParsedJson pj) { if (len > pj.bytecapacity) { return(JsonParseError.CAPACITY); } uint32_t *base_ptr = pj.structural_indexes; uint32_t @base = 0; #if SIMDJSON_UTF8VALIDATE utf8_checking_state state; #endif // we have padded the input out to 64 byte multiple with the remainder being // zeros // persistent state across loop // does the last iteration end with an odd-length sequence of backslashes? // either 0 or 1, but a 64-bit value uint64_t prev_iter_ends_odd_backslash = 0UL; // does the previous iteration end inside a double-quote pair? uint64_t prev_iter_inside_quote = 0UL; // either all zeros or all ones // does the previous iteration end on something that is a predecessor of a // pseudo-structural character - i.e. whitespace or a structural character // effectively the very first char is considered to follow "whitespace" for // the // purposes of pseudo-structural character detection so we initialize to 1 uint64_t prev_iter_ends_pseudo_pred = 1UL; // structurals are persistent state across loop as we flatten them on the // subsequent iteration into our array pointed to be base_ptr. // This is harmless on the first iteration as structurals==0 // and is done for performance reasons; we can hide some of the latency of the // expensive carryless multiply in the previous step with this work uint64_t structurals = 0; size_t lenminus64 = len < 64 ? 0 : len - 64; size_t idx = 0; uint64_t error_mask = 0; // for unescaped characters within strings (ASCII code points < 0x20) for (; idx < lenminus64; idx += 64) { //__builtin_prefetch(buf + idx + 128); simd_input @in = fill_input(buf + idx); #if SIMDJSON_UTF8VALIDATE check_utf8(in, state); #endif // detect odd sequences of backslashes uint64_t odd_ends = find_odd_backslash_sequences( @in, ref prev_iter_ends_odd_backslash); // detect insides of quote pairs ("quote_mask") and also our quote_bits // themselves uint64_t quote_bits = 0; uint64_t quote_mask = find_quote_mask_and_bits( @in, odd_ends, ref prev_iter_inside_quote, ref quote_bits, ref error_mask); // take the previous iterations structural bits, not our current iteration, // and flatten flatten_bits(base_ptr, ref @base, (uint32_t)idx, structurals); uint64_t whitespace = 0; find_whitespace_and_structurals(@in, ref whitespace, ref structurals); // fixup structurals to reflect quotes and add pseudo-structural characters structurals = finalize_structurals(structurals, whitespace, quote_mask, quote_bits, ref prev_iter_ends_pseudo_pred); } //////////////// // we use a giant copy-paste which is ugly. // but otherwise the string needs to be properly padded or else we // risk invalidating the UTF-8 checks. //////////// if (idx < len) { uint8_t *tmpbuf = stackalloc uint8_t[64]; memset(tmpbuf, 0x20, 64); memcpy(tmpbuf, buf + idx, len - idx); simd_input @in = fill_input(tmpbuf); #if SIMDJSON_UTF8VALIDATE check_utf8 <T>(in, state); #endif // detect odd sequences of backslashes uint64_t odd_ends = find_odd_backslash_sequences( @in, ref prev_iter_ends_odd_backslash); // detect insides of quote pairs ("quote_mask") and also our quote_bits // themselves uint64_t quote_bits = 0; uint64_t quote_mask = find_quote_mask_and_bits( @in, odd_ends, ref prev_iter_inside_quote, ref quote_bits, ref error_mask); // take the previous iterations structural bits, not our current iteration, // and flatten flatten_bits(base_ptr, ref @base, (uint)idx, structurals); uint64_t whitespace = 0; find_whitespace_and_structurals(@in, ref whitespace, ref structurals); // fixup structurals to reflect quotes and add pseudo-strucural characters structurals = finalize_structurals(structurals, whitespace, quote_mask, quote_bits, ref prev_iter_ends_pseudo_pred); idx += 64; } // is last string quote closed? if (prev_iter_inside_quote != 0) { return(JsonParseError.UNCLOSED_STRING); } // finally, flatten out the remaining structurals from the last iteration flatten_bits(base_ptr, ref @base, (uint)idx, structurals); pj.n_structural_indexes = @base; // a valid JSON file cannot have zero structural indexes - we should have // found something if (pj.n_structural_indexes == 0u) { return(JsonParseError.EMPTY); } if (base_ptr[pj.n_structural_indexes - 1] > len) { return(JsonParseError.UNEXPECTED_ERROR); } if (len != base_ptr[pj.n_structural_indexes - 1]) { // the string might not be NULL terminated, but we add a virtual NULL ending // character. base_ptr[pj.n_structural_indexes++] = (uint)len; } // make it safe to dereference one beyond this array base_ptr[pj.n_structural_indexes] = 0; if (error_mask != 0) { return(JsonParseError.UNESCAPED_CHARS); } #if SIMDJSON_UTF8VALIDATE return(check_utf8_errors(state)); #else return(JsonParseError.SUCCESS); #endif }
internal static void flatten_bits(uint32_t *base_ptr, ref uint32_t @base, uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. if (bits == 0) { return; } uint32_t cnt = (uint32_t)hamming(bits); uint32_t next_base = @base + cnt; idx -= 64; base_ptr += @base; { base_ptr[0] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[1] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[2] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[3] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[4] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[5] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[6] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[7] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr += 8; } // We hope that the next branch is easily predicted. if (cnt > 8) { base_ptr[0] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[1] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[2] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[3] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[4] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[5] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[6] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr[7] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr += 8; } if (cnt > 16) { // unluckly: we rarely get here // since it means having one structural or pseudo-structral element // every 4 characters (possible with inputs like "","","",...). do { base_ptr[0] = (uint32_t)(idx + trailingzeroes(bits)); bits = bits & (bits - 1); base_ptr++; } while (bits != 0); } @base = next_base; }
public static extern void sort_ispc(int32_t n, uint32_t *code, int32_t *order, int32_t ntasks) /*x28*/;