public static void ulong4() { Random64 rng = new Random64(135); for (ulong i = 0; i < 64; i++) { ulong4 x = rng.NextULong4(); ulong4 y = rng.NextULong4(); Assert.AreEqual(new ulong4((ulong)_gcd(x.x, y.x), (ulong)_gcd(x.y, y.y), (ulong)_gcd(x.z, y.z), (ulong)_gcd(x.w, y.w)), maxmath.gcd(x, y)); } }
public static void long4() { Random64 rng = new Random64(135); for (long i = 0; i < 64; i++) { long4 x = rng.NextLong4(); ulong4 n = rng.NextULong4(); Assert.AreEqual(new long4((long)_intpow(x.x, n.x), (long)_intpow(x.y, n.y), (long)_intpow(x.z, n.z), (long)_intpow(x.w, n.w)), maxmath.intpow(x, n)); } }
public static bool4 ispow2(ulong4 x) { if (Avx2.IsAvx2Supported) { v128 result = new byte4(1) & ((byte4)(long4)Avx2.mm256_and_si256(Operator.greater_mask_ulong(x, default(v256)), Avx2.mm256_cmpeq_epi64(default(v256), x & (x - 1)))); return(*(bool4 *)&result); } else { return(new bool4(ispow2(x.xy), ispow2(x.zw))); } }
public static ulong cmin(ulong4 x) { if (Sse4_2.IsSse42Supported) { ulong2 temp = min(x.xy, x.zw); return(min(temp, temp.yy).x); } else { return(math.min(x.x, math.min(x.y, math.min(x.z, x.w)))); } }
internal static v256 greater_mask_ulong(v256 left, v256 right) { if (Avx2.IsAvx2Supported) { ulong4 mask = 1ul << 63; return(Avx2.mm256_cmpgt_epi64(Avx2.mm256_xor_si256(left, mask), Avx2.mm256_xor_si256(right, mask))); } else { throw new CPUFeatureCheckException(); } }
public static ulong4 gcd(ulong4 x, ulong4 y) { if (Avx2.IsAvx2Supported) { v256 ZERO = default(v256); v256 result = ZERO; v256 result_if_zero_any = ZERO; v256 x_is_zero = Avx2.mm256_cmpeq_epi64(x, ZERO); v256 y_is_zero = Avx2.mm256_cmpeq_epi64(y, ZERO); v256 any_zero = Avx2.mm256_or_si256(x_is_zero, y_is_zero); result_if_zero_any = Avx2.mm256_blendv_epi8(result_if_zero_any, y, x_is_zero); result_if_zero_any = Avx2.mm256_blendv_epi8(result_if_zero_any, x, y_is_zero); v256 doneMask = any_zero; v256 shift = tzcnt(x | y); x = Avx2.mm256_srlv_epi64(x, tzcnt(x)); do { y = Avx2.mm256_srlv_epi64(y, tzcnt(y)); v256 tempX = x; v256 x_greater_y = Operator.greater_mask_ulong(x, y); x = Avx2.mm256_blendv_epi8(x, y, x_greater_y); y = Avx2.mm256_blendv_epi8(y, tempX, x_greater_y); y -= x; v256 loopCheck = Avx2.mm256_andnot_si256(doneMask, Avx2.mm256_cmpeq_epi64(y, ZERO)); result = Avx2.mm256_blendv_epi8(result, x, loopCheck); doneMask = Avx2.mm256_or_si256(doneMask, loopCheck); } while (bitmask32(4 * sizeof(ulong)) != Avx2.mm256_movemask_epi8(doneMask)); result = Avx2.mm256_sllv_epi64(result, shift); result = Avx2.mm256_blendv_epi8(result, result_if_zero_any, any_zero); return(result); } else { return(new ulong4(gcd(x._xy, y._xy), gcd(x._zw, y._zw))); } }
public static long4 compareto(ulong4 x, ulong4 y) { if (Avx2.IsAvx2Supported) { long4 xGreatery = Operator.greater_mask_ulong(x, y); long4 yGreaterx = Operator.greater_mask_ulong(y, x); return((0 - xGreatery) + yGreaterx); } else { return(new long4(compareto(x.xy, y.xy), compareto(x.zw, y.zw))); } }
public static void ulong4() { for (int i = 0; i < ULong4.TestData_LHS.Length; i++) { for (int j = 1; j < 4; j++) { ulong4 rol = maxmath.vrol(ULong4.TestData_LHS[i], j); for (int k = 0; k < 4; k++) { Assert.AreEqual(rol[k], ULong4.TestData_LHS[i][((4 - j) + k) % 4]); } } } }
public static void ulong4() { for (int i = 0; i < ULong4.TestData_LHS.Length; i++) { for (int j = 1; j < 4; j++) { ulong4 shl = maxmath.vshl(ULong4.TestData_LHS[i], j); for (int k = 0; k < 4; k++) { Assert.IsTrue(shl[k] == ((k - j < 0) ? 0 : ULong4.TestData_LHS[i][k - j])); } } } }
public static void ulong4() { for (int i = 0; i < ULong4.TestData_LHS.Length; i++) { for (int j = 1; j < 4; j++) { ulong4 shr = maxmath.vshr(ULong4.TestData_LHS[i], j); for (int k = 0; k < 4; k++) { Assert.IsTrue(shr[k] == ((j + k >= 4) ? 0 : ULong4.TestData_LHS[i][j + k])); } } } }
public static void Cast_ToULong() { bool result = true; for (int i = 0; i < NUM_TESTS; i++) { ulong4 x = (ulong4)TestData_LHS[i]; result &= x.x == (ulong)TestData_LHS[i].x & x.y == (ulong)TestData_LHS[i].y & x.z == (ulong)TestData_LHS[i].z & x.w == (ulong)TestData_LHS[i].w; } Assert.AreEqual(true, result); }
public ulong4 NextULong4(ulong4 min, ulong4 max) { Assert.IsNotSmaller(max.x, min.x); Assert.IsNotSmaller(max.y, min.y); Assert.IsNotSmaller(max.z, min.z); Assert.IsNotSmaller(max.w, min.w); max -= min; ulong4 result = ulong4.zero; Common.umul128(NextState(), max.x, out result.x); Common.umul128(NextState(), max.y, out result.y); Common.umul128(NextState(), max.z, out result.z); Common.umul128(NextState(), max.w, out result.w); return(min + result); }
public static bool4 isdivisible(uint4 dividend, uint4 divisor) { Assert.AreNotEqual(0u, divisor.x); Assert.AreNotEqual(0u, divisor.y); Assert.AreNotEqual(0u, divisor.z); Assert.AreNotEqual(0u, divisor.w); if (Constant.IsConstantExpression(divisor)) { ulong4 compile = (new ulong4(ulong.MaxValue) / divisor) + 1; return(dividend * compile <= compile - 1); } else { return(dividend % divisor == 0); } }
public static ulong4 floorpow2(ulong4 x) { if (Avx2.IsAvx2Supported) { return(shrl(0x8000_0000_0000_0000, lzcnt(x))); } else { x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x |= x >> 32; return(x - (x >> 1)); } }
public long4 NextLong4(long4 min, long4 max) { Assert.IsNotSmaller(max.x, min.x); Assert.IsNotSmaller(max.y, min.y); Assert.IsNotSmaller(max.z, min.z); Assert.IsNotSmaller(max.w, min.w); ulong4 result = ulong4.zero; max -= min; Common.umul128(NextState(), (ulong)(max.x), out result.x); Common.umul128(NextState(), (ulong)(max.y), out result.y); Common.umul128(NextState(), (ulong)(max.z), out result.z); Common.umul128(NextState(), (ulong)(max.w), out result.w); return(min + (long4)result); }
public static void ror_ulong4() { bool result = true; for (int i = 0; i < ULong4.NUM_TESTS; i++) { for (int j = 0; j < NUM_ROTATION_TESTS; j++) { ulong4 test = maxmath.ror(ULong4.TestData_LHS[i], ULong4.TestData_RHS[i]); result &= test.x == math.ror(ULong4.TestData_LHS[i].x, (int)ULong4.TestData_RHS[i].x); result &= test.y == math.ror(ULong4.TestData_LHS[i].y, (int)ULong4.TestData_RHS[i].y); result &= test.z == math.ror(ULong4.TestData_LHS[i].z, (int)ULong4.TestData_RHS[i].z); result &= test.w == math.ror(ULong4.TestData_LHS[i].w, (int)ULong4.TestData_RHS[i].w); } } Assert.AreEqual(true, result); }
public static ulong4 ceilpow2(ulong4 x) { if (Avx2.IsAvx2Supported) { return(shrl(0x8000_0000_0000_0000, lzcnt(x - 1) - 1)); } else { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x |= x >> 32; return(x + 1); } }
public static long4 intpow(long4 x, ulong4 n) { if (Avx2.IsAvx2Supported) { v256 ZERO = long4.zero; v256 ONE = new long4(1); v256 doneMask = ZERO; v256 result = ZERO; v256 p = x; v256 y = ONE; Loop: v256 y_times_p = Operator.mul_long(y, p); y = Avx2.mm256_blendv_epi8(y, y_times_p, Avx2.mm256_cmpeq_epi64(ONE, ONE & n)); n >>= 1; v256 n_is_zero = Avx2.mm256_cmpeq_epi64(ZERO, n); result = Avx2.mm256_blendv_epi8(result, y, Avx2.mm256_andnot_si256(doneMask, n_is_zero)); doneMask = n_is_zero; if (-1 != Avx2.mm256_movemask_epi8(doneMask)) { p = Operator.mul_long(p, p); goto Loop; } else { return(result); } } else { return(new long4(intpow(x.xy, n.xy), intpow(x.zw, n.zw))); } }
public static ulong4 lzcnt(ulong4 x) { if (Avx2.IsAvx2Supported) { ulong4 y = x >> 32; ulong4 cmp = Avx2.mm256_cmpeq_epi64(y, default(v256)); ulong4 bits = Avx2.mm256_blendv_epi8(y, 0x0000_0000_FFFF_FFFF & x, cmp); ulong4 offset = Avx2.mm256_blendv_epi8((ulong4)0x041E, (ulong4)0x043E, cmp); bits += 0x4330_0000_0000_0000ul; bits = Avx.mm256_sub_pd(bits, new v256(4503599627370496d)); bits = offset - (bits >> 52); return(Avx2.mm256_blendv_epi8(bits, new ulong4(64), Avx2.mm256_cmpeq_epi64(x, default(v256)))); } else { return(new ulong4((ulong)math.lzcnt(x.x), (ulong)math.lzcnt(x.y), (ulong)math.lzcnt(x.z), (ulong)math.lzcnt(x.w))); } }
public static void rol_ulong4() { bool result = true; Random32 rng = new Random32(RNG_SEED); for (int i = 0; i < ULong4.NUM_TESTS; i++) { for (int j = 0; j < NUM_ROTATION_TESTS; j++) { int n = rng.NextInt(); ulong4 test = maxmath.rol(ULong4.TestData_LHS[i], n); result &= test.x == math.rol(ULong4.TestData_LHS[i].x, n); result &= test.y == math.rol(ULong4.TestData_LHS[i].y, n); result &= test.z == math.rol(ULong4.TestData_LHS[i].z, n); result &= test.w == math.rol(ULong4.TestData_LHS[i].w, n); } } Assert.AreEqual(true, result); }
public static bool4 isdivisible(uint4 dividend, uint divisor) { Assert.AreNotEqual(0u, divisor); if (Constant.IsConstantExpression(divisor)) { if (math.ispow2(divisor)) { return((dividend & (divisor - 1)) == 0); } else { ulong4 compile = (new ulong4(ulong.MaxValue) / divisor) + 1; return(dividend * compile <= compile - 1); } } else { return(dividend % divisor == 0); } }
internal static double4 ULong4ToDouble4(ulong4 x) { if (Avx2.IsAvx2Supported) { ulong4 magic_lo = 0x4330_0000_0000_0000; ulong4 magic_hi = 0x4530_0000_0000_0000; ulong4 magic_dbl = 0x4530_0000_0010_0000; ulong4 lo = Avx2.mm256_blend_epi32(magic_lo, x, 0b0101_0101); ulong4 hi = magic_hi ^ (x >> 32); v256 hi_dbl = Avx.mm256_sub_pd(hi, magic_dbl); v256 result = Avx.mm256_add_pd(hi_dbl, lo); return(*(double4 *)&result); } else { throw new CPUFeatureCheckException(); } }
public static bool8 isdivisible(uint8 dividend, uint8 divisor) { Assert.AreNotEqual(0u, divisor.x0); Assert.AreNotEqual(0u, divisor.x1); Assert.AreNotEqual(0u, divisor.x2); Assert.AreNotEqual(0u, divisor.x3); Assert.AreNotEqual(0u, divisor.x4); Assert.AreNotEqual(0u, divisor.x5); Assert.AreNotEqual(0u, divisor.x6); Assert.AreNotEqual(0u, divisor.x7); if (Constant.IsConstantExpression(divisor)) { ulong4 compile_lo = (new ulong4(ulong.MaxValue) / divisor.v4_0) + 1; ulong4 compile_hi = (new ulong4(ulong.MaxValue) / divisor.v4_4) + 1; return(new bool8(dividend.v4_0 * compile_lo <= compile_lo - 1, dividend.v4_4 * compile_lo <= compile_lo - 1)); } else { return(dividend % divisor == 0); } }
public static ulong4 clamp(ulong4 x, ulong4 a, ulong4 b) { return(max(a, min(x, b))); }
public static ulong4 countbits(ulong4 x) { return(new ulong4((uint)math.countbits(x.x), (uint)math.countbits(x.y), (uint)math.countbits(x.z), (uint)math.countbits(x.w))); }
public static bool4 isinrange(ulong4 x, ulong4 min, ulong4 max) { return(maxmath.min(maxmath.max(x, min), max) == x); }
public ulong4x3(ulong4 c0, ulong4 c1, ulong4 c2) { this.c0 = c0; this.c1 = c1; this.c2 = c2; }
public static extern CUResult cuMemcpyDtoH_v2(ref ulong4 dstHost, CUdeviceptr srcDevice, SizeT ByteCount);
public static bool all(ulong4 x) { return(all((long4)x)); }
public ulong4x3(ulong v) { this.c0 = v; this.c1 = v; this.c2 = v; }
public static ulong4 andnot(ulong4 left, ulong4 right) { return((ulong4)andnot((long4)left, (long4)right)); }