public static byte32 lcm(sbyte32 x, sbyte32 y) { byte32 absX = (byte32)abs(x); byte32 absY = (byte32)abs(y); return((absX / gcd(absX, absY)) * absY); }
public static byte32 reversebits(byte32 x) { x = ((x >> 1) & 0x55) | ((x & 0x55) << 1); x = ((x >> 2) & 0x33) | ((x & 0x33) << 2); return((x >> 4) | (x << 4)); }
public static int indexof(byte32 v, byte x) { if (Avx2.IsAvx2Supported) { return(math.tzcnt(Avx2.mm256_movemask_epi8(Avx2.mm256_cmpeq_epi8(v, new byte32(x))))); } else if (Sse2.IsSse2Supported) { byte16 broadcast = x; return(math.tzcnt(Sse2.movemask_epi8(Sse2.cmpeq_epi8(v._v16_0, broadcast)) | (Sse2.movemask_epi8(Sse2.cmpeq_epi8(v._v16_16, broadcast)) << 16))); } else { for (int i = 0; i < 32; i++) { if (v[i] == x) { return(i); } else { continue; } } return(32); } }
public static byte32 floorpow2(byte32 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x - (x >> 1)); }
public static byte32 ceilpow2(byte32 x) { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x + 1); }
public byte32 NextByte32(byte32 min, byte32 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); Assert.IsNotSmaller(max.x16, min.x16); Assert.IsNotSmaller(max.x17, min.x17); Assert.IsNotSmaller(max.x18, min.x18); Assert.IsNotSmaller(max.x19, min.x19); Assert.IsNotSmaller(max.x20, min.x20); Assert.IsNotSmaller(max.x21, min.x21); Assert.IsNotSmaller(max.x22, min.x22); Assert.IsNotSmaller(max.x23, min.x23); Assert.IsNotSmaller(max.x24, min.x24); Assert.IsNotSmaller(max.x25, min.x25); Assert.IsNotSmaller(max.x26, min.x26); Assert.IsNotSmaller(max.x27, min.x27); Assert.IsNotSmaller(max.x28, min.x28); Assert.IsNotSmaller(max.x29, min.x29); Assert.IsNotSmaller(max.x30, min.x30); Assert.IsNotSmaller(max.x31, min.x31); if (Avx2.IsAvx2Supported) { max -= min; short16 lo = (short16)max.v16_0 * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); short16 hi = (short16)max.v16_16 * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); lo = Avx2.mm256_shuffle_epi8(lo, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); hi = Avx2.mm256_shuffle_epi8(hi, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); return(min + new byte32(Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(lo, Sse.SHUFFLE(0, 0, 2, 0))), Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(hi, Sse.SHUFFLE(0, 0, 2, 0))))); } else { return(new byte32(NextByte8(min.v8_0, max.v8_0), NextByte8(min.v8_8, max.v8_8), NextByte8(min.v8_16, max.v8_16), NextByte8(min.v8_24, max.v8_24))); } }
public static byte32 avg(byte32 x, byte32 y) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_avg_epu8(x, y)); } else { return(new byte32(avg(x.v16_0, y.v16_0), avg(x.v16_16, y.v16_16))); } }
public static uint csum(byte32 x) { if (Avx2.IsAvx2Supported) { return(sad(x, byte32.zero)); } else { return(csum(x.v16_0) + csum(x.v16_16)); } }
public static byte32 andnot(byte32 left, byte32 right) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_andnot_si256(right, left)); } else { return(new byte32(andnot(left.v16_0, right.v16_0), andnot(left.v16_16, right.v16_16))); } }
public static byte32 max(byte32 a, byte32 b) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_max_epu8(a, b)); } else { return(new byte32(max(a.v16_0, b.v16_0), max(a.v16_16, b.v16_16))); } }
public static bool all(byte32 x) { if (Avx2.IsAvx2Supported) { return(0 == Avx2.mm256_movemask_epi8(Avx2.mm256_cmpeq_epi8(x, default(v256)))); } else { return(all(x.v16_0) & all(x.v16_16)); } }
public static bool all_eq(byte32 c) { if (Avx2.IsAvx2Supported) { return(((byte32)Avx2.mm256_broadcastb_epi8(Avx.mm256_castsi256_si128(c))).Equals(c)); } else { return(all_eq(c.v16_0) & all_eq(c.v16_16) & c.v16_0.Equals(c.v16_16)); } }
public static byte32 subadd(byte32 a, byte32 b) { if (Avx2.IsAvx2Supported) { return(a + Avx2.mm256_sign_epi8(b, new v256(255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1))); } else { return(new byte32(subadd(a.v16_0, b.v16_0), subadd(a.v16_16, b.v16_16))); } }
public static byte32 divrem(byte32 dividend, byte divisor, out byte32 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (byte32)divisor, out remainder)); } }
public static bool32 ispow2(byte32 x) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_and_si256(Avx2.mm256_and_si256(Operator.greater_mask_byte(x, default(v256)), Avx2.mm256_cmpeq_epi8(default(v256), x & (x - 1))), new byte32(1))); } else { return(new bool32(ispow2(x.v16_0), ispow2(x.v16_16))); } }
internal static v256 greater_mask_byte(byte32 left, byte32 right) { if (Avx2.IsAvx2Supported) { byte32 mask = 1 << 7; return(Avx2.mm256_cmpgt_epi8(Avx2.mm256_xor_si256(left, mask), Avx2.mm256_xor_si256(right, mask))); } else { throw new CPUFeatureCheckException(); } }
public static uint sad(byte32 a, byte32 b) { if (Avx2.IsAvx2Supported) { a = Avx2.mm256_sad_epu8(a, b); a = Avx2.mm256_add_epi16(a, Avx2.mm256_shuffle_epi32(a, Sse.SHUFFLE(0, 0, 0, 2))); return(Avx2.mm256_add_epi16(a, Avx2.mm256_permute4x64_epi64(a, Sse.SHUFFLE(0, 0, 0, 3))).UShort0); } else { return(sad(a.v16_0, b.v16_0) + sad(a.v16_16, b.v16_16)); } }
public static byte32 gcd(byte32 x, byte32 y) { if (Avx2.IsAvx2Supported) { v256 ZERO = default(v256); v256 result = ZERO; v256 result_if_zero_any = ZERO; v256 x_is_zero = Avx2.mm256_cmpeq_epi8(x, ZERO); v256 y_is_zero = Avx2.mm256_cmpeq_epi8(y, ZERO); v256 any_zero = Avx2.mm256_or_si256(x_is_zero, y_is_zero); result_if_zero_any = Avx2.mm256_blendv_epi8(result_if_zero_any, y, x_is_zero); result_if_zero_any = Avx2.mm256_blendv_epi8(result_if_zero_any, x, y_is_zero); v256 doneMask = any_zero; v256 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); v256 tempX = x; x = Avx2.mm256_min_epu8(x, y); y = Avx2.mm256_max_epu8(y, tempX); y -= x; v256 loopCheck = Avx2.mm256_andnot_si256(doneMask, Avx2.mm256_cmpeq_epi8(y, ZERO)); result = Avx2.mm256_blendv_epi8(result, x, loopCheck); doneMask = Avx2.mm256_or_si256(doneMask, loopCheck); } while (-1 != Avx2.mm256_movemask_epi8(doneMask)); result = shl((byte32)result, (byte32)shift); result = Avx2.mm256_blendv_epi8(result, result_if_zero_any, any_zero); return(result); } else { return(new byte32(gcd(x.v16_0, y.v16_0), gcd(x.v16_16, y.v16_16))); } }
public static void ror_byte32() { bool result = true; Random32 rng = new Random32(RNG_SEED); for (int i = 0; i < Byte32.NUM_TESTS; i++) { for (int j = 0; j < NUM_ROTATION_TESTS; j++) { int n = rng.NextInt(); byte32 test = maxmath.ror(Byte32.TestData_LHS[i], n); result &= test.x0 == (byte)math.ror(Byte32.TestData_LHS[i].x0 | (Byte32.TestData_LHS[i].x0 << 8) | (Byte32.TestData_LHS[i].x0 << 16) | (Byte32.TestData_LHS[i].x0 << 24), n); result &= test.x1 == (byte)math.ror(Byte32.TestData_LHS[i].x1 | (Byte32.TestData_LHS[i].x1 << 8) | (Byte32.TestData_LHS[i].x1 << 16) | (Byte32.TestData_LHS[i].x1 << 24), n); result &= test.x2 == (byte)math.ror(Byte32.TestData_LHS[i].x2 | (Byte32.TestData_LHS[i].x2 << 8) | (Byte32.TestData_LHS[i].x2 << 16) | (Byte32.TestData_LHS[i].x2 << 24), n); result &= test.x3 == (byte)math.ror(Byte32.TestData_LHS[i].x3 | (Byte32.TestData_LHS[i].x3 << 8) | (Byte32.TestData_LHS[i].x3 << 16) | (Byte32.TestData_LHS[i].x3 << 24), n); result &= test.x4 == (byte)math.ror(Byte32.TestData_LHS[i].x4 | (Byte32.TestData_LHS[i].x4 << 8) | (Byte32.TestData_LHS[i].x4 << 16) | (Byte32.TestData_LHS[i].x4 << 24), n); result &= test.x5 == (byte)math.ror(Byte32.TestData_LHS[i].x5 | (Byte32.TestData_LHS[i].x5 << 8) | (Byte32.TestData_LHS[i].x5 << 16) | (Byte32.TestData_LHS[i].x5 << 24), n); result &= test.x6 == (byte)math.ror(Byte32.TestData_LHS[i].x6 | (Byte32.TestData_LHS[i].x6 << 8) | (Byte32.TestData_LHS[i].x6 << 16) | (Byte32.TestData_LHS[i].x6 << 24), n); result &= test.x7 == (byte)math.ror(Byte32.TestData_LHS[i].x7 | (Byte32.TestData_LHS[i].x7 << 8) | (Byte32.TestData_LHS[i].x7 << 16) | (Byte32.TestData_LHS[i].x7 << 24), n); result &= test.x8 == (byte)math.ror(Byte32.TestData_LHS[i].x8 | (Byte32.TestData_LHS[i].x8 << 8) | (Byte32.TestData_LHS[i].x8 << 16) | (Byte32.TestData_LHS[i].x8 << 24), n); result &= test.x9 == (byte)math.ror(Byte32.TestData_LHS[i].x9 | (Byte32.TestData_LHS[i].x9 << 8) | (Byte32.TestData_LHS[i].x9 << 16) | (Byte32.TestData_LHS[i].x9 << 24), n); result &= test.x10 == (byte)math.ror(Byte32.TestData_LHS[i].x10 | (Byte32.TestData_LHS[i].x10 << 8) | (Byte32.TestData_LHS[i].x10 << 16) | (Byte32.TestData_LHS[i].x10 << 24), n); result &= test.x11 == (byte)math.ror(Byte32.TestData_LHS[i].x11 | (Byte32.TestData_LHS[i].x11 << 8) | (Byte32.TestData_LHS[i].x11 << 16) | (Byte32.TestData_LHS[i].x11 << 24), n); result &= test.x12 == (byte)math.ror(Byte32.TestData_LHS[i].x12 | (Byte32.TestData_LHS[i].x12 << 8) | (Byte32.TestData_LHS[i].x12 << 16) | (Byte32.TestData_LHS[i].x12 << 24), n); result &= test.x13 == (byte)math.ror(Byte32.TestData_LHS[i].x13 | (Byte32.TestData_LHS[i].x13 << 8) | (Byte32.TestData_LHS[i].x13 << 16) | (Byte32.TestData_LHS[i].x13 << 24), n); result &= test.x14 == (byte)math.ror(Byte32.TestData_LHS[i].x14 | (Byte32.TestData_LHS[i].x14 << 8) | (Byte32.TestData_LHS[i].x14 << 16) | (Byte32.TestData_LHS[i].x14 << 24), n); result &= test.x15 == (byte)math.ror(Byte32.TestData_LHS[i].x15 | (Byte32.TestData_LHS[i].x15 << 8) | (Byte32.TestData_LHS[i].x15 << 16) | (Byte32.TestData_LHS[i].x15 << 24), n); result &= test.x16 == (byte)math.ror(Byte32.TestData_LHS[i].x16 | (Byte32.TestData_LHS[i].x16 << 8) | (Byte32.TestData_LHS[i].x16 << 16) | (Byte32.TestData_LHS[i].x16 << 24), n); result &= test.x17 == (byte)math.ror(Byte32.TestData_LHS[i].x17 | (Byte32.TestData_LHS[i].x17 << 8) | (Byte32.TestData_LHS[i].x17 << 16) | (Byte32.TestData_LHS[i].x17 << 24), n); result &= test.x18 == (byte)math.ror(Byte32.TestData_LHS[i].x18 | (Byte32.TestData_LHS[i].x18 << 8) | (Byte32.TestData_LHS[i].x18 << 16) | (Byte32.TestData_LHS[i].x18 << 24), n); result &= test.x19 == (byte)math.ror(Byte32.TestData_LHS[i].x19 | (Byte32.TestData_LHS[i].x19 << 8) | (Byte32.TestData_LHS[i].x19 << 16) | (Byte32.TestData_LHS[i].x19 << 24), n); result &= test.x20 == (byte)math.ror(Byte32.TestData_LHS[i].x20 | (Byte32.TestData_LHS[i].x20 << 8) | (Byte32.TestData_LHS[i].x20 << 16) | (Byte32.TestData_LHS[i].x20 << 24), n); result &= test.x21 == (byte)math.ror(Byte32.TestData_LHS[i].x21 | (Byte32.TestData_LHS[i].x21 << 8) | (Byte32.TestData_LHS[i].x21 << 16) | (Byte32.TestData_LHS[i].x21 << 24), n); result &= test.x22 == (byte)math.ror(Byte32.TestData_LHS[i].x22 | (Byte32.TestData_LHS[i].x22 << 8) | (Byte32.TestData_LHS[i].x22 << 16) | (Byte32.TestData_LHS[i].x22 << 24), n); result &= test.x23 == (byte)math.ror(Byte32.TestData_LHS[i].x23 | (Byte32.TestData_LHS[i].x23 << 8) | (Byte32.TestData_LHS[i].x23 << 16) | (Byte32.TestData_LHS[i].x23 << 24), n); result &= test.x24 == (byte)math.ror(Byte32.TestData_LHS[i].x24 | (Byte32.TestData_LHS[i].x24 << 8) | (Byte32.TestData_LHS[i].x24 << 16) | (Byte32.TestData_LHS[i].x24 << 24), n); result &= test.x25 == (byte)math.ror(Byte32.TestData_LHS[i].x25 | (Byte32.TestData_LHS[i].x25 << 8) | (Byte32.TestData_LHS[i].x25 << 16) | (Byte32.TestData_LHS[i].x25 << 24), n); result &= test.x26 == (byte)math.ror(Byte32.TestData_LHS[i].x26 | (Byte32.TestData_LHS[i].x26 << 8) | (Byte32.TestData_LHS[i].x26 << 16) | (Byte32.TestData_LHS[i].x26 << 24), n); result &= test.x27 == (byte)math.ror(Byte32.TestData_LHS[i].x27 | (Byte32.TestData_LHS[i].x27 << 8) | (Byte32.TestData_LHS[i].x27 << 16) | (Byte32.TestData_LHS[i].x27 << 24), n); result &= test.x28 == (byte)math.ror(Byte32.TestData_LHS[i].x28 | (Byte32.TestData_LHS[i].x28 << 8) | (Byte32.TestData_LHS[i].x28 << 16) | (Byte32.TestData_LHS[i].x28 << 24), n); result &= test.x29 == (byte)math.ror(Byte32.TestData_LHS[i].x29 | (Byte32.TestData_LHS[i].x29 << 8) | (Byte32.TestData_LHS[i].x29 << 16) | (Byte32.TestData_LHS[i].x29 << 24), n); result &= test.x30 == (byte)math.ror(Byte32.TestData_LHS[i].x30 | (Byte32.TestData_LHS[i].x30 << 8) | (Byte32.TestData_LHS[i].x30 << 16) | (Byte32.TestData_LHS[i].x30 << 24), n); result &= test.x31 == (byte)math.ror(Byte32.TestData_LHS[i].x31 | (Byte32.TestData_LHS[i].x31 << 8) | (Byte32.TestData_LHS[i].x31 << 16) | (Byte32.TestData_LHS[i].x31 << 24), n); } } Assert.AreEqual(true, result); }
public static void byte32() { for (int i = 0; i < Byte32.TestData_LHS.Length; i++) { for (int j = 1; j < 32; j++) { byte32 shl = maxmath.vshl(Byte32.TestData_LHS[i], j); for (int k = 0; k < 32; k++) { Assert.IsTrue(shl[k] == ((k - j < 0) ? 0 : Byte32.TestData_LHS[i][k - j])); } } } }
public static void byte32() { for (int i = 0; i < Byte32.TestData_LHS.Length; i++) { for (int j = 1; j < 32; j++) { byte32 shr = maxmath.vshr(Byte32.TestData_LHS[i], j); for (int k = 0; k < 32; k++) { Assert.IsTrue(shr[k] == ((j + k >= 32) ? 0 : Byte32.TestData_LHS[i][j + k])); } } } }
public static sbyte32 compareto(byte32 x, byte32 y) { if (Avx2.IsAvx2Supported) { sbyte32 xGreatery = Operator.greater_mask_byte(x, y); sbyte32 yGreaterx = Operator.greater_mask_byte(y, x); return((0 - xGreatery) + yGreaterx); } else { return(new sbyte32(compareto(x.v16_0, y.v16_0), compareto(x.v16_16, y.v16_16))); } }
public static void byte32() { for (int i = 0; i < Byte32.TestData_LHS.Length; i++) { for (int j = 1; j < 32; j++) { byte32 ror = maxmath.vror(Byte32.TestData_LHS[i], j); for (int k = 0; k < 32; k++) { Assert.AreEqual(ror[k], Byte32.TestData_LHS[i][(j + k) % 32]); } } } }
internal static sbyte32 vrem_sbyte(sbyte32 dividend, sbyte32 divisor) { if (Avx2.IsAvx2Supported) { byte32 remainder = vrem_byte((byte32)maxmath.abs(dividend), (byte32)maxmath.abs(divisor)); byte32 mustNegate = Avx2.mm256_cmpgt_epi8(sbyte32.zero, dividend); return(Avx2.mm256_blendv_epi8(remainder, -((sbyte32)remainder), mustNegate)); } else { throw new CPUFeatureCheckException(); } }
public static bool32 isdivisible(byte32 dividend, byte32 divisor) { Assert.AreNotEqual(0, divisor.x0); Assert.AreNotEqual(0, divisor.x1); Assert.AreNotEqual(0, divisor.x2); Assert.AreNotEqual(0, divisor.x3); Assert.AreNotEqual(0, divisor.x4); Assert.AreNotEqual(0, divisor.x5); Assert.AreNotEqual(0, divisor.x6); Assert.AreNotEqual(0, divisor.x7); Assert.AreNotEqual(0, divisor.x8); Assert.AreNotEqual(0, divisor.x9); Assert.AreNotEqual(0, divisor.x10); Assert.AreNotEqual(0, divisor.x11); Assert.AreNotEqual(0, divisor.x12); Assert.AreNotEqual(0, divisor.x13); Assert.AreNotEqual(0, divisor.x14); Assert.AreNotEqual(0, divisor.x15); Assert.AreNotEqual(0, divisor.x16); Assert.AreNotEqual(0, divisor.x17); Assert.AreNotEqual(0, divisor.x18); Assert.AreNotEqual(0, divisor.x19); Assert.AreNotEqual(0, divisor.x20); Assert.AreNotEqual(0, divisor.x21); Assert.AreNotEqual(0, divisor.x22); Assert.AreNotEqual(0, divisor.x23); Assert.AreNotEqual(0, divisor.x24); Assert.AreNotEqual(0, divisor.x25); Assert.AreNotEqual(0, divisor.x26); Assert.AreNotEqual(0, divisor.x27); Assert.AreNotEqual(0, divisor.x28); Assert.AreNotEqual(0, divisor.x29); Assert.AreNotEqual(0, divisor.x30); Assert.AreNotEqual(0, divisor.x31); if (Constant.IsConstantExpression(divisor)) { ushort16 compile_lo = (new ushort16(ushort.MaxValue) / divisor.v16_0) + 1; ushort16 compile_hi = (new ushort16(ushort.MaxValue) / divisor.v16_16) + 1; return(new bool32(dividend.v16_0 * compile_lo <= compile_lo - 1, dividend.v16_16 * compile_lo <= compile_lo - 1)); } else { return(dividend % divisor == 0); } }
internal static sbyte32 vdiv_sbyte(sbyte32 dividend, sbyte32 divisor) { if (Avx2.IsAvx2Supported) { byte32 quotient = vdiv_byte((byte32)maxmath.abs(dividend), (byte32)maxmath.abs(divisor)); byte32 mustNegate = Avx2.mm256_xor_si256(Avx2.mm256_cmpgt_epi8(sbyte32.zero, divisor), Avx2.mm256_cmpgt_epi8(sbyte32.zero, dividend)); return(Avx2.mm256_blendv_epi8(quotient, -((sbyte32)quotient), mustNegate)); } else { throw new CPUFeatureCheckException(); } }
public static byte32 divrem(byte32 dividend, byte32 divisor, out byte32 remainder) { if (Avx2.IsAvx2Supported) { return(Operator.vdivrem_byte(dividend, divisor, out remainder)); } else { byte32 quotients = new byte32(divrem(dividend.v16_0, divisor.v16_0, out byte16 remLo), divrem(dividend.v16_16, divisor.v16_16, out byte16 remHi)); remainder = new byte32(remLo, remHi); return(quotients); } }
public static bool32 toboolsafe(byte32 x) { if (Avx2.IsAvx2Supported) { return((v256)clamp(x, 0, 1)); } else if (Sse2.IsSse2Supported) { return(new bool32((v128)clamp(x.v16_0, 0, 1), (v128)clamp(x.v16_16, 0, 1))); } else { byte32 temp = clamp(x, 0, 1); return(*(bool32 *)&temp); } }
public static byte32 lzcnt(byte32 x) { if (Avx2.IsAvx2Supported) { v256 NIBBLE_MASK = new v256(0x0F0F_0F0F); v256 SHUFFLE_MASK_LO = new v256(8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4); v256 SHUFFLE_MASK_HI = new v256(8, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 8, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); return(Avx2.mm256_min_epu8(Avx2.mm256_shuffle_epi8(SHUFFLE_MASK_LO, Avx2.mm256_and_si256(NIBBLE_MASK, x)), Avx2.mm256_shuffle_epi8(SHUFFLE_MASK_HI, Avx2.mm256_and_si256(NIBBLE_MASK, Avx2.mm256_srli_epi16(x, 4))))); } else { return(new byte32(lzcnt(x.v16_0), lzcnt(x.v16_16))); } }
public static void byte32() { Random8 rng = new Random8(135); for (int i = 0; i < 64; i++) { byte32 x = rng.NextByte32(); byte32 y = rng.NextByte32(); Assert.AreEqual(new byte32((byte)_gcd(x.x0, y.x0), (byte)_gcd(x.x1, y.x1), (byte)_gcd(x.x2, y.x2), (byte)_gcd(x.x3, y.x3), (byte)_gcd(x.x4, y.x4), (byte)_gcd(x.x5, y.x5), (byte)_gcd(x.x6, y.x6), (byte)_gcd(x.x7, y.x7), (byte)_gcd(x.x8, y.x8), (byte)_gcd(x.x9, y.x9), (byte)_gcd(x.x10, y.x10), (byte)_gcd(x.x11, y.x11), (byte)_gcd(x.x12, y.x12), (byte)_gcd(x.x13, y.x13), (byte)_gcd(x.x14, y.x14), (byte)_gcd(x.x15, y.x15), (byte)_gcd(x.x16, y.x16), (byte)_gcd(x.x17, y.x17), (byte)_gcd(x.x18, y.x18), (byte)_gcd(x.x19, y.x19), (byte)_gcd(x.x20, y.x20), (byte)_gcd(x.x21, y.x21), (byte)_gcd(x.x22, y.x22), (byte)_gcd(x.x23, y.x23), (byte)_gcd(x.x24, y.x24), (byte)_gcd(x.x25, y.x25), (byte)_gcd(x.x26, y.x26), (byte)_gcd(x.x27, y.x27), (byte)_gcd(x.x28, y.x28), (byte)_gcd(x.x29, y.x29), (byte)_gcd(x.x30, y.x30), (byte)_gcd(x.x31, y.x31)), maxmath.gcd(x, y)); } }