public static void short16() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { short16 x = rng.NextShort16(); ushort16 n = rng.NextUShort16(); Assert.AreEqual(new short16((short)_intpow(x.x0, n.x0), (short)_intpow(x.x1, n.x1), (short)_intpow(x.x2, n.x2), (short)_intpow(x.x3, n.x3), (short)_intpow(x.x4, n.x4), (short)_intpow(x.x5, n.x5), (short)_intpow(x.x6, n.x6), (short)_intpow(x.x7, n.x7), (short)_intpow(x.x8, n.x8), (short)_intpow(x.x9, n.x9), (short)_intpow(x.x10, n.x10), (short)_intpow(x.x11, n.x11), (short)_intpow(x.x12, n.x12), (short)_intpow(x.x13, n.x13), (short)_intpow(x.x14, n.x14), (short)_intpow(x.x15, n.x15)), maxmath.intpow(x, n)); } }
public static void rol_ushort16() { bool result = true; Random32 rng = new Random32(RNG_SEED); for (int i = 0; i < UShort16.NUM_TESTS; i++) { for (int j = 0; j < NUM_ROTATION_TESTS; j++) { int n = rng.NextInt(); ushort16 test = maxmath.rol(UShort16.TestData_LHS[i], n); result &= test.x0 == (ushort)math.rol(UShort16.TestData_LHS[i].x0 | (UShort16.TestData_LHS[i].x0 << 16), n); result &= test.x1 == (ushort)math.rol(UShort16.TestData_LHS[i].x1 | (UShort16.TestData_LHS[i].x1 << 16), n); result &= test.x2 == (ushort)math.rol(UShort16.TestData_LHS[i].x2 | (UShort16.TestData_LHS[i].x2 << 16), n); result &= test.x3 == (ushort)math.rol(UShort16.TestData_LHS[i].x3 | (UShort16.TestData_LHS[i].x3 << 16), n); result &= test.x4 == (ushort)math.rol(UShort16.TestData_LHS[i].x4 | (UShort16.TestData_LHS[i].x4 << 16), n); result &= test.x5 == (ushort)math.rol(UShort16.TestData_LHS[i].x5 | (UShort16.TestData_LHS[i].x5 << 16), n); result &= test.x6 == (ushort)math.rol(UShort16.TestData_LHS[i].x6 | (UShort16.TestData_LHS[i].x6 << 16), n); result &= test.x7 == (ushort)math.rol(UShort16.TestData_LHS[i].x7 | (UShort16.TestData_LHS[i].x7 << 16), n); result &= test.x8 == (ushort)math.rol(UShort16.TestData_LHS[i].x8 | (UShort16.TestData_LHS[i].x8 << 16), n); result &= test.x9 == (ushort)math.rol(UShort16.TestData_LHS[i].x9 | (UShort16.TestData_LHS[i].x9 << 16), n); result &= test.x10 == (ushort)math.rol(UShort16.TestData_LHS[i].x10 | (UShort16.TestData_LHS[i].x10 << 16), n); result &= test.x11 == (ushort)math.rol(UShort16.TestData_LHS[i].x11 | (UShort16.TestData_LHS[i].x11 << 16), n); result &= test.x12 == (ushort)math.rol(UShort16.TestData_LHS[i].x12 | (UShort16.TestData_LHS[i].x12 << 16), n); result &= test.x13 == (ushort)math.rol(UShort16.TestData_LHS[i].x13 | (UShort16.TestData_LHS[i].x13 << 16), n); result &= test.x14 == (ushort)math.rol(UShort16.TestData_LHS[i].x14 | (UShort16.TestData_LHS[i].x14 << 16), n); result &= test.x15 == (ushort)math.rol(UShort16.TestData_LHS[i].x15 | (UShort16.TestData_LHS[i].x15 << 16), n); } } Assert.AreEqual(true, result); }
public static bool16 isdivisible(ushort16 dividend, ushort16 divisor) { Assert.AreNotEqual(0, divisor.x0); Assert.AreNotEqual(0, divisor.x1); Assert.AreNotEqual(0, divisor.x2); Assert.AreNotEqual(0, divisor.x3); Assert.AreNotEqual(0, divisor.x4); Assert.AreNotEqual(0, divisor.x5); Assert.AreNotEqual(0, divisor.x6); Assert.AreNotEqual(0, divisor.x7); Assert.AreNotEqual(0, divisor.x8); Assert.AreNotEqual(0, divisor.x9); Assert.AreNotEqual(0, divisor.x10); Assert.AreNotEqual(0, divisor.x11); Assert.AreNotEqual(0, divisor.x12); Assert.AreNotEqual(0, divisor.x13); Assert.AreNotEqual(0, divisor.x14); Assert.AreNotEqual(0, divisor.x15); if (Constant.IsConstantExpression(divisor)) { uint8 compile_lo = (new uint8(uint.MaxValue) / divisor.v8_0) + 1; uint8 compile_hi = (new uint8(uint.MaxValue) / divisor.v8_8) + 1; return(new bool16(dividend.v8_0 * compile_lo <= compile_lo - 1, dividend.v8_8 * compile_lo <= compile_lo - 1)); } else { return(dividend % divisor == 0); } }
internal static byte16 vrem_byte(byte16 dividend, byte16 divisor) { Assert.AreNotEqual(divisor.x0, 0); Assert.AreNotEqual(divisor.x1, 0); Assert.AreNotEqual(divisor.x2, 0); Assert.AreNotEqual(divisor.x3, 0); Assert.AreNotEqual(divisor.x4, 0); Assert.AreNotEqual(divisor.x5, 0); Assert.AreNotEqual(divisor.x6, 0); Assert.AreNotEqual(divisor.x7, 0); Assert.AreNotEqual(divisor.x8, 0); Assert.AreNotEqual(divisor.x9, 0); Assert.AreNotEqual(divisor.x10, 0); Assert.AreNotEqual(divisor.x11, 0); Assert.AreNotEqual(divisor.x12, 0); Assert.AreNotEqual(divisor.x13, 0); Assert.AreNotEqual(divisor.x14, 0); Assert.AreNotEqual(divisor.x15, 0); if (Avx2.IsAvx2Supported) { ushort16 remainders = ushort16.zero; ushort16 divisorCast = divisor; ushort16 dividendCast = dividend; remainders |= (new ushort16(1) & (dividendCast >> 7)); v256 subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder); for (int i = 6; i > 0; i--) { remainders <<= 1; remainders |= (new ushort16(1) & (dividendCast >> i)); subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder); } remainders <<= 1; remainders |= new ushort16(1) & dividendCast; subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder); return(Sse2.packus_epi16(Avx.mm256_castsi256_si128(remainders), Avx2.mm256_extracti128_si256(remainders, 1))); } else { throw new CPUFeatureCheckException(); } }
public static void ushort16() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { ushort16 x = rng.NextUShort16(); Assert.AreEqual(new ushort16((ushort)_intsqrt(x.x0), (ushort)_intsqrt(x.x1), (ushort)_intsqrt(x.x2), (ushort)_intsqrt(x.x3), (ushort)_intsqrt(x.x4), (ushort)_intsqrt(x.x5), (ushort)_intsqrt(x.x6), (ushort)_intsqrt(x.x7), (ushort)_intsqrt(x.x8), (ushort)_intsqrt(x.x9), (ushort)_intsqrt(x.x10), (ushort)_intsqrt(x.x11), (ushort)_intsqrt(x.x12), (ushort)_intsqrt(x.x13), (ushort)_intsqrt(x.x14), (ushort)_intsqrt(x.x15)), maxmath.intsqrt(x)); } }
public static bool16 isdivisible(byte16 dividend, byte16 divisor) { Assert.AreNotEqual(0, divisor.x0); Assert.AreNotEqual(0, divisor.x1); Assert.AreNotEqual(0, divisor.x2); Assert.AreNotEqual(0, divisor.x3); Assert.AreNotEqual(0, divisor.x4); Assert.AreNotEqual(0, divisor.x5); Assert.AreNotEqual(0, divisor.x6); Assert.AreNotEqual(0, divisor.x7); Assert.AreNotEqual(0, divisor.x8); Assert.AreNotEqual(0, divisor.x9); Assert.AreNotEqual(0, divisor.x10); Assert.AreNotEqual(0, divisor.x11); Assert.AreNotEqual(0, divisor.x12); Assert.AreNotEqual(0, divisor.x13); Assert.AreNotEqual(0, divisor.x14); Assert.AreNotEqual(0, divisor.x15); if (Constant.IsConstantExpression(divisor)) { ushort16 compile = (new ushort16(ushort.MaxValue) / divisor) + 1; return(dividend * compile <= compile - 1); } else { return(dividend % divisor == 0); } }
public static ushort16 lcm(short16 x, short16 y) { ushort16 absX = (ushort16)abs(x); ushort16 absY = (ushort16)abs(y); return((absX / gcd(absX, absY)) * absY); }
public sbyte16 NextSByte16(sbyte16 min, sbyte16 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); if (Avx2.IsAvx2Supported) { ushort16 temp = (ushort16)(max - min) * new ushort16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); temp = Avx2.mm256_shuffle_epi8(temp, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); return(min + Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(temp, Sse.SHUFFLE(0, 0, 2, 0)))); } else { return(new sbyte16(NextSByte8(min.v8_0, max.v8_0), NextSByte8(min.v8_8, max.v8_8))); } }
public static void ushort16() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { ushort16 x = rng.NextUShort16(); ushort16 y = rng.NextUShort16(); Assert.AreEqual(new ushort16((ushort)_gcd(x.x0, y.x0), (ushort)_gcd(x.x1, y.x1), (ushort)_gcd(x.x2, y.x2), (ushort)_gcd(x.x3, y.x3), (ushort)_gcd(x.x4, y.x4), (ushort)_gcd(x.x5, y.x5), (ushort)_gcd(x.x6, y.x6), (ushort)_gcd(x.x7, y.x7), (ushort)_gcd(x.x8, y.x8), (ushort)_gcd(x.x9, y.x9), (ushort)_gcd(x.x10, y.x10), (ushort)_gcd(x.x11, y.x11), (ushort)_gcd(x.x12, y.x12), (ushort)_gcd(x.x13, y.x13), (ushort)_gcd(x.x14, y.x14), (ushort)_gcd(x.x15, y.x15)), maxmath.gcd(x, y)); } }
public ushort16 NextUShort16(ushort16 max) { Assert.IsPositive(max.x0); Assert.IsPositive(max.x1); Assert.IsPositive(max.x2); Assert.IsPositive(max.x3); Assert.IsPositive(max.x4); Assert.IsPositive(max.x5); Assert.IsPositive(max.x6); Assert.IsPositive(max.x7); Assert.IsPositive(max.x8); Assert.IsPositive(max.x9); Assert.IsPositive(max.x10); Assert.IsPositive(max.x11); Assert.IsPositive(max.x12); Assert.IsPositive(max.x13); Assert.IsPositive(max.x14); Assert.IsPositive(max.x15); if (Avx2.IsAvx2Supported) { return(Avx2.mm256_mulhi_epi16(max, new ushort16((ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState()))); } else { return(new ushort16(NextUShort8(max.v8_0), NextUShort8(max.v8_8))); } }
public static int indexof(ushort16 v, ushort x) { if (Avx2.IsAvx2Supported) { return(math.tzcnt(Avx2.mm256_movemask_epi8(Avx2.mm256_cmpeq_epi16(v, new ushort16(x)))) >> 1); } else if (Sse2.IsSse2Supported) { ushort8 broadcast = x; return(math.tzcnt(Sse2.movemask_epi8(Sse2.cmpeq_epi16(v._v8_0, broadcast)) | (Sse2.movemask_epi8(Sse2.cmpeq_epi16(v._v8_8, broadcast)) << 16)) >> 1); } else { for (int i = 0; i < 16; i++) { if (v[i] == x) { return(i); } else { continue; } } return(16); } }
public ushort16 NextUShort16(ushort16 min, ushort16 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); if (Avx2.IsAvx2Supported) { return(min + Avx2.mm256_mulhi_epi16(max - min, new ushort16((ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState(), (ushort)NextState()))); } else { return(new ushort16(NextUShort8(min.v8_0, max.v8_0), NextUShort8(min.v8_8, max.v8_8))); } }
public static ushort16 reversebits(ushort16 x) { x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1); x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2); x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4); return((x >> 8) | (x << 8)); }
public static ushort16 floorpow2(ushort16 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; return(x - (x >> 1)); }
public static ushort16 ceilpow2(ushort16 x) { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; return(x + 1); }
public sbyte32 NextSByte32(sbyte32 min, sbyte32 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); Assert.IsNotSmaller(max.x16, min.x16); Assert.IsNotSmaller(max.x17, min.x17); Assert.IsNotSmaller(max.x18, min.x18); Assert.IsNotSmaller(max.x19, min.x19); Assert.IsNotSmaller(max.x20, min.x20); Assert.IsNotSmaller(max.x21, min.x21); Assert.IsNotSmaller(max.x22, min.x22); Assert.IsNotSmaller(max.x23, min.x23); Assert.IsNotSmaller(max.x24, min.x24); Assert.IsNotSmaller(max.x25, min.x25); Assert.IsNotSmaller(max.x26, min.x26); Assert.IsNotSmaller(max.x27, min.x27); Assert.IsNotSmaller(max.x28, min.x28); Assert.IsNotSmaller(max.x29, min.x29); Assert.IsNotSmaller(max.x30, min.x30); Assert.IsNotSmaller(max.x31, min.x31); if (Avx2.IsAvx2Supported) { max -= min; ushort16 lo = (ushort16)max.v16_0 * new ushort16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); ushort16 hi = (ushort16)max.v16_16 * new ushort16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); lo = Avx2.mm256_shuffle_epi8(lo, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); hi = Avx2.mm256_shuffle_epi8(hi, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); return(min + new sbyte32(Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(lo, Sse.SHUFFLE(0, 0, 2, 0))), Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(hi, Sse.SHUFFLE(0, 0, 2, 0))))); } else { return(new sbyte32(NextSByte8(min.v8_0, max.v8_0), NextSByte8(min.v8_8, max.v8_8), NextSByte8(min.v8_16, max.v8_16), NextSByte8(min.v8_24, max.v8_24))); } }
public static ushort16 subadd(ushort16 a, ushort16 b) { if (Avx2.IsAvx2Supported) { return(a + Avx2.mm256_sign_epi16(b, new v256(ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1))); } else { return(new ushort16(subadd(a.v8_0, b.v8_0), subadd(a.v8_8, b.v8_8))); } }
public static ushort16 andnot(ushort16 left, ushort16 right) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_andnot_si256(right, left)); } else { return(new ushort16(andnot(left.v8_0, right.v8_0), andnot(left.v8_8, right.v8_8))); } }
public static ushort16 avg(ushort16 x, ushort16 y) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_avg_epu16(x, y)); } else { return(new ushort16(avg(x.v8_0, y.v8_0), avg(x.v8_8, y.v8_8))); } }
public static ushort16 max(ushort16 a, ushort16 b) { if (Avx2.IsAvx2Supported) { return(Avx2.mm256_max_epu16(a, b)); } else { return(new ushort16(max(a.v8_0, b.v8_0), max(a.v8_8, b.v8_8))); } }
public static bool16 ispow2(ushort16 x) { if (Avx2.IsAvx2Supported) { return((v128)(new byte16(1) & (byte16)(ushort16)Avx2.mm256_and_si256(Operator.greater_mask_ushort(x, default(v256)), Avx2.mm256_cmpeq_epi16(default(v256), x & (x - 1))))); } else { return(new bool16(ispow2(x.v8_0), ispow2(x.v8_8))); } }
public static ushort16 divrem(ushort16 dividend, ushort divisor, out ushort16 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (ushort16)divisor, out remainder)); } }
public static ushort16 countbits(ushort16 x) { if (Avx2.IsAvx2Supported) { ushort16 byteBits = (v256)countbits((byte32)(v256)x); return((byteBits & 0x00FF) + (byteBits >> 8)); } else { return(new ushort16(countbits(x.v8_0), countbits(x.v8_8))); } }
public static bool16 toboolsafe(ushort16 x) { if (Sse2.IsSse2Supported) { return((v128)(byte16)clamp(x, 0, 1)); } else { byte16 temp = (byte16)clamp(x, 0, 1); return(*(bool16 *)&temp); } }
public static ushort16 bitmask16(ushort16 numBits, ushort16 index = default(ushort16)) { Assert.IsBetween(index.x0, 0u, 16u); Assert.IsBetween(index.x1, 0u, 16u); Assert.IsBetween(index.x2, 0u, 16u); Assert.IsBetween(index.x3, 0u, 16u); Assert.IsBetween(index.x4, 0u, 16u); Assert.IsBetween(index.x5, 0u, 16u); Assert.IsBetween(index.x6, 0u, 16u); Assert.IsBetween(index.x7, 0u, 16u); Assert.IsBetween(index.x8, 0u, 16u); Assert.IsBetween(index.x9, 0u, 16u); Assert.IsBetween(index.x10, 0u, 16u); Assert.IsBetween(index.x11, 0u, 16u); Assert.IsBetween(index.x12, 0u, 16u); Assert.IsBetween(index.x13, 0u, 16u); Assert.IsBetween(index.x14, 0u, 16u); Assert.IsBetween(index.x15, 0u, 16u); Assert.IsBetween(numBits.x0, 0u, 16u - index.x0); Assert.IsBetween(numBits.x1, 0u, 16u - index.x1); Assert.IsBetween(numBits.x2, 0u, 16u - index.x2); Assert.IsBetween(numBits.x3, 0u, 16u - index.x3); Assert.IsBetween(numBits.x4, 0u, 16u - index.x4); Assert.IsBetween(numBits.x5, 0u, 16u - index.x5); Assert.IsBetween(numBits.x6, 0u, 16u - index.x6); Assert.IsBetween(numBits.x7, 0u, 16u - index.x7); Assert.IsBetween(numBits.x8, 0u, 16u - index.x8); Assert.IsBetween(numBits.x9, 0u, 16u - index.x9); Assert.IsBetween(numBits.x10, 0u, 16u - index.x10); Assert.IsBetween(numBits.x11, 0u, 16u - index.x11); Assert.IsBetween(numBits.x12, 0u, 16u - index.x12); Assert.IsBetween(numBits.x13, 0u, 16u - index.x13); Assert.IsBetween(numBits.x14, 0u, 16u - index.x14); Assert.IsBetween(numBits.x15, 0u, 16u - index.x15); if (Avx2.IsAvx2Supported) { // mask index = shl(ushort.MaxValue, index); v256 isMaxBitsMask = Avx2.mm256_cmpeq_epi16(numBits, new ushort16(16)); return(isMaxBitsMask | andnot(index, shl(index, numBits))); } else { return(new ushort16(bitmask16(numBits.v8_0, index.v8_0), bitmask16(numBits.v8_8, index.v8_8))); } }
internal static v256 greater_mask_ushort(ushort16 left, ushort16 right) { if (Avx2.IsAvx2Supported) { ushort16 mask = 1 << 15; return(Avx2.mm256_cmpgt_epi16(Avx2.mm256_xor_si256(left, mask), Avx2.mm256_xor_si256(right, mask))); } else { throw new CPUFeatureCheckException(); } }
public static ushort16 gcd(ushort16 x, ushort16 y) { if (Avx2.IsAvx2Supported) { v256 ZERO = default(v256); v256 result = ZERO; v256 result_if_zero_any = ZERO; v256 x_is_zero = Avx2.mm256_cmpeq_epi16(x, ZERO); v256 y_is_zero = Avx2.mm256_cmpeq_epi16(y, ZERO); v256 any_zero = Avx2.mm256_or_si256(x_is_zero, y_is_zero); result_if_zero_any = Avx2.mm256_blendv_epi8(result_if_zero_any, y, x_is_zero); result_if_zero_any = Avx2.mm256_blendv_epi8(result_if_zero_any, x, y_is_zero); v256 doneMask = any_zero; v256 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); v256 tempX = x; x = Avx2.mm256_min_epu16(x, y); y = Avx2.mm256_max_epu16(y, tempX); y -= x; v256 loopCheck = Avx2.mm256_andnot_si256(doneMask, Avx2.mm256_cmpeq_epi16(y, ZERO)); result = Avx2.mm256_blendv_epi8(result, x, loopCheck); doneMask = Avx2.mm256_or_si256(doneMask, loopCheck); } while (-1 != Avx2.mm256_movemask_epi8(doneMask)); result = shl((ushort16)result, (ushort16)shift); result = Avx2.mm256_blendv_epi8(result, result_if_zero_any, any_zero); return(result); } else { return(new ushort16(gcd(x.v8_0, y.v8_0), gcd(x.v8_8, y.v8_8))); } }
public static void ushort16() { for (int i = 0; i < UShort16.TestData_LHS.Length; i++) { for (int j = 1; j < 16; j++) { ushort16 rol = maxmath.vrol(UShort16.TestData_LHS[i], j); for (int k = 0; k < 16; k++) { Assert.AreEqual(rol[k], UShort16.TestData_LHS[i][((16 - j) + k) % 16]); } } } }
internal static ushort16 vdiv_ushort(ushort16 dividend, ushort16 divisor) { if (Avx2.IsAvx2Supported) { float8 lo = vdiv_ushort_AVX(dividend.v8_0, divisor.v8_0); float8 hi = vdiv_ushort_AVX(dividend.v8_8, divisor.v8_8); return(Avx2.mm256_permute4x64_epi64(Avx2.mm256_packus_epi32((uint8)lo, (uint8)hi), Sse.SHUFFLE(3, 1, 2, 0))); } else { throw new CPUFeatureCheckException(); } }
public static short16 compareto(ushort16 x, ushort16 y) { if (Avx2.IsAvx2Supported) { short16 xGreatery = Operator.greater_mask_ushort(x, y); short16 yGreaterx = Operator.greater_mask_ushort(y, x); return((0 - xGreatery) + yGreaterx); } else { return(new short16(compareto(x.v8_0, y.v8_0), compareto(x.v8_8, y.v8_8))); } }