static void do_recursion(ref w128_t r, ref w128_t a, ref w128_t b, ref w128_t c, ref w128_t d)
 {
     lshift128(ref x, ref a, SFMT_SL2);
     rshift128(ref y, ref c, SFMT_SR2);
     r.u32_0 = a.u32_0 ^ x.u32_0 ^ ((b.u32_0 >> SFMT_SR1) & SFMT_MSK1) ^ y.u32_0 ^ (d.u32_0 << SFMT_SL1);
     r.u32_1 = a.u32_1 ^ x.u32_1 ^ ((b.u32_1 >> SFMT_SR1) & SFMT_MSK2) ^ y.u32_1 ^ (d.u32_1 << SFMT_SL1);
     r.u32_2 = a.u32_2 ^ x.u32_2 ^ ((b.u32_2 >> SFMT_SR1) & SFMT_MSK3) ^ y.u32_2 ^ (d.u32_2 << SFMT_SL1);
     r.u32_3 = a.u32_3 ^ x.u32_3 ^ ((b.u32_3 >> SFMT_SR1) & SFMT_MSK4) ^ y.u32_3 ^ (d.u32_3 << SFMT_SL1);
 }
        public void sfmt_fill_array32(uint32_t[] array, int size)
        {
            assert(idx == SFMT_N32);
            assert(size % 4 == 0);
            assert(size >= SFMT_N32);
            var buf = new w128_t[size / 4];

            gen_rand_array(buf, buf.Length);
            copy32(buf, array);
            idx = SFMT_N32;
        }
        public void sfmt_fill_array64(uint64_t[] array, int size)
        {
            assert(idx == SFMT_N32);
            assert(size % 2 == 0);
            assert(size >= SFMT_N64);
            var buf = new w128_t[size / 2];

            gen_rand_array(buf, buf.Length);
            copy64(buf, array);
            idx = SFMT_N32;
        }
        static void lshift128(ref w128_t dst, ref w128_t src, int shift)
        {
            uint64_t th, tl, oh, ol;

            th        = src.u64_1;
            tl        = src.u64_0;
            oh        = th << (shift * 8);
            ol        = tl << (shift * 8);
            oh       |= tl >> (64 - shift * 8);
            dst.u64_0 = ol;
            dst.u64_1 = oh;
        }
        static void lshift128(ref w128_t dst, ref w128_t src, int shift)
        {
            uint64_t th, tl, oh, ol;

            th        = ((uint64_t)src.u32_3 << 32) | ((uint64_t)src.u32_2);
            tl        = ((uint64_t)src.u32_1 << 32) | ((uint64_t)src.u32_0);
            oh        = th << (shift * 8);
            ol        = tl << (shift * 8);
            oh       |= tl >> (64 - shift * 8);
            dst.u32_1 = (uint32_t)(ol >> 32);
            dst.u32_0 = (uint32_t)ol;
            dst.u32_3 = (uint32_t)(oh >> 32);
            dst.u32_2 = (uint32_t)oh;
        }