public static byte16 intsqrt(byte16 x) { if (Avx2.IsAvx2Supported) { return(new byte16(intsqrt(x.v8_0), intsqrt(x.v8_8))); } else if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); byte16 result = ZERO; byte16 mask = new byte16(1 << 6); v128 doneMask = ZERO; v128 tempMask = Sse2.cmpeq_epi8(ZERO, ZERO); doneMask = Sse2.cmpeq_epi8(x, max(mask, x)); tempMask = Mask.BlendV(tempMask, mask, doneMask); while (bitmask32(16 * sizeof(byte)) != Sse2.movemask_epi8(doneMask)) { mask >>= 2; doneMask = Sse2.or_si128(doneMask, Sse2.cmpeq_epi8(x, max(mask, x))); if (Sse4_1.IsSse41Supported) { tempMask = Mask.BlendV(tempMask, mask, Sse2.and_si128(tempMask, doneMask)); } else { tempMask = Mask.BlendV(tempMask, mask, Sse2.and_si128(Sse2.cmpgt_epi8(default, tempMask), doneMask));
internal static byte16 vrem_byte(byte16 dividend, byte16 divisor) { Assert.AreNotEqual(divisor.x0, 0); Assert.AreNotEqual(divisor.x1, 0); Assert.AreNotEqual(divisor.x2, 0); Assert.AreNotEqual(divisor.x3, 0); Assert.AreNotEqual(divisor.x4, 0); Assert.AreNotEqual(divisor.x5, 0); Assert.AreNotEqual(divisor.x6, 0); Assert.AreNotEqual(divisor.x7, 0); Assert.AreNotEqual(divisor.x8, 0); Assert.AreNotEqual(divisor.x9, 0); Assert.AreNotEqual(divisor.x10, 0); Assert.AreNotEqual(divisor.x11, 0); Assert.AreNotEqual(divisor.x12, 0); Assert.AreNotEqual(divisor.x13, 0); Assert.AreNotEqual(divisor.x14, 0); Assert.AreNotEqual(divisor.x15, 0); if (Avx2.IsAvx2Supported) { ushort16 remainders = ushort16.zero; ushort16 divisorCast = divisor; ushort16 dividendCast = dividend; remainders |= (new ushort16(1) & (dividendCast >> 7)); v256 subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder); for (int i = 6; i > 0; i--) { remainders <<= 1; remainders |= (new ushort16(1) & (dividendCast >> i)); subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder); } remainders <<= 1; remainders |= new ushort16(1) & dividendCast; subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder); return(Sse2.packus_epi16(Avx.mm256_castsi256_si128(remainders), Avx2.mm256_extracti128_si256(remainders, 1))); } else { throw new CPUFeatureCheckException(); } }
public static void sbyte16() { Random8 rng = new Random8(135); for (int i = 0; i < 64; i++) { sbyte16 x = rng.NextSByte16(); byte16 n = rng.NextByte16(); Assert.AreEqual(new sbyte16((sbyte)_intpow(x.x0, n.x0), (sbyte)_intpow(x.x1, n.x1), (sbyte)_intpow(x.x2, n.x2), (sbyte)_intpow(x.x3, n.x3), (sbyte)_intpow(x.x4, n.x4), (sbyte)_intpow(x.x5, n.x5), (sbyte)_intpow(x.x6, n.x6), (sbyte)_intpow(x.x7, n.x7), (sbyte)_intpow(x.x8, n.x8), (sbyte)_intpow(x.x9, n.x9), (sbyte)_intpow(x.x10, n.x10), (sbyte)_intpow(x.x11, n.x11), (sbyte)_intpow(x.x12, n.x12), (sbyte)_intpow(x.x13, n.x13), (sbyte)_intpow(x.x14, n.x14), (sbyte)_intpow(x.x15, n.x15)), maxmath.intpow(x, n)); } }
public static void rol_byte16() { bool result = true; Random32 rng = new Random32(RNG_SEED); for (int i = 0; i < Byte16.NUM_TESTS; i++) { for (int j = 0; j < NUM_ROTATION_TESTS; j++) { int n = rng.NextInt(); byte16 test = maxmath.rol(Byte16.TestData_LHS[i], n); result &= test.x0 == (byte)math.rol(Byte16.TestData_LHS[i].x0 | (Byte16.TestData_LHS[i].x0 << 8) | (Byte16.TestData_LHS[i].x0 << 16) | (Byte16.TestData_LHS[i].x0 << 24), n); result &= test.x1 == (byte)math.rol(Byte16.TestData_LHS[i].x1 | (Byte16.TestData_LHS[i].x1 << 8) | (Byte16.TestData_LHS[i].x1 << 16) | (Byte16.TestData_LHS[i].x1 << 24), n); result &= test.x2 == (byte)math.rol(Byte16.TestData_LHS[i].x2 | (Byte16.TestData_LHS[i].x2 << 8) | (Byte16.TestData_LHS[i].x2 << 16) | (Byte16.TestData_LHS[i].x2 << 24), n); result &= test.x3 == (byte)math.rol(Byte16.TestData_LHS[i].x3 | (Byte16.TestData_LHS[i].x3 << 8) | (Byte16.TestData_LHS[i].x3 << 16) | (Byte16.TestData_LHS[i].x3 << 24), n); result &= test.x4 == (byte)math.rol(Byte16.TestData_LHS[i].x4 | (Byte16.TestData_LHS[i].x4 << 8) | (Byte16.TestData_LHS[i].x4 << 16) | (Byte16.TestData_LHS[i].x4 << 24), n); result &= test.x5 == (byte)math.rol(Byte16.TestData_LHS[i].x5 | (Byte16.TestData_LHS[i].x5 << 8) | (Byte16.TestData_LHS[i].x5 << 16) | (Byte16.TestData_LHS[i].x5 << 24), n); result &= test.x6 == (byte)math.rol(Byte16.TestData_LHS[i].x6 | (Byte16.TestData_LHS[i].x6 << 8) | (Byte16.TestData_LHS[i].x6 << 16) | (Byte16.TestData_LHS[i].x6 << 24), n); result &= test.x7 == (byte)math.rol(Byte16.TestData_LHS[i].x7 | (Byte16.TestData_LHS[i].x7 << 8) | (Byte16.TestData_LHS[i].x7 << 16) | (Byte16.TestData_LHS[i].x7 << 24), n); result &= test.x8 == (byte)math.rol(Byte16.TestData_LHS[i].x8 | (Byte16.TestData_LHS[i].x8 << 8) | (Byte16.TestData_LHS[i].x8 << 16) | (Byte16.TestData_LHS[i].x8 << 24), n); result &= test.x9 == (byte)math.rol(Byte16.TestData_LHS[i].x9 | (Byte16.TestData_LHS[i].x9 << 8) | (Byte16.TestData_LHS[i].x9 << 16) | (Byte16.TestData_LHS[i].x9 << 24), n); result &= test.x10 == (byte)math.rol(Byte16.TestData_LHS[i].x10 | (Byte16.TestData_LHS[i].x10 << 8) | (Byte16.TestData_LHS[i].x10 << 16) | (Byte16.TestData_LHS[i].x10 << 24), n); result &= test.x11 == (byte)math.rol(Byte16.TestData_LHS[i].x11 | (Byte16.TestData_LHS[i].x11 << 8) | (Byte16.TestData_LHS[i].x11 << 16) | (Byte16.TestData_LHS[i].x11 << 24), n); result &= test.x12 == (byte)math.rol(Byte16.TestData_LHS[i].x12 | (Byte16.TestData_LHS[i].x12 << 8) | (Byte16.TestData_LHS[i].x12 << 16) | (Byte16.TestData_LHS[i].x12 << 24), n); result &= test.x13 == (byte)math.rol(Byte16.TestData_LHS[i].x13 | (Byte16.TestData_LHS[i].x13 << 8) | (Byte16.TestData_LHS[i].x13 << 16) | (Byte16.TestData_LHS[i].x13 << 24), n); result &= test.x14 == (byte)math.rol(Byte16.TestData_LHS[i].x14 | (Byte16.TestData_LHS[i].x14 << 8) | (Byte16.TestData_LHS[i].x14 << 16) | (Byte16.TestData_LHS[i].x14 << 24), n); result &= test.x15 == (byte)math.rol(Byte16.TestData_LHS[i].x15 | (Byte16.TestData_LHS[i].x15 << 8) | (Byte16.TestData_LHS[i].x15 << 16) | (Byte16.TestData_LHS[i].x15 << 24), n); } } Assert.AreEqual(true, result); }
public static byte16 lcm(sbyte16 x, sbyte16 y) { byte16 absX = (byte16)abs(x); byte16 absY = (byte16)abs(y); return((absX / gcd(absX, absY)) * absY); }
public byte16 NextByte16(byte16 min, byte16 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); Assert.IsNotSmaller(max.x8, min.x8); Assert.IsNotSmaller(max.x9, min.x9); Assert.IsNotSmaller(max.x10, min.x10); Assert.IsNotSmaller(max.x11, min.x11); Assert.IsNotSmaller(max.x12, min.x12); Assert.IsNotSmaller(max.x13, min.x13); Assert.IsNotSmaller(max.x14, min.x14); Assert.IsNotSmaller(max.x15, min.x15); if (Avx2.IsAvx2Supported) { short16 temp = (short16)(max - min) * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); temp = Avx2.mm256_shuffle_epi8(temp, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); return(min + Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(temp, Sse.SHUFFLE(0, 0, 2, 0)))); } else { return(new byte16(NextByte8(min.v8_0, max.v8_0), NextByte8(min.v8_8, max.v8_8))); } }
internal static byte8 vdivrem_byte_SSE_FALLBACK(byte8 dividend, byte8 divisor, out byte8 remainder) { Assert.AreNotEqual(divisor.x0, 0); Assert.AreNotEqual(divisor.x1, 0); Assert.AreNotEqual(divisor.x2, 0); Assert.AreNotEqual(divisor.x3, 0); Assert.AreNotEqual(divisor.x4, 0); Assert.AreNotEqual(divisor.x5, 0); Assert.AreNotEqual(divisor.x6, 0); Assert.AreNotEqual(divisor.x7, 0); if (Sse2.IsSse2Supported) { ushort8 quotients = ushort8.zero; ushort8 remainders = ushort8.zero; ushort8 divisorCast = divisor; ushort8 dividendCast = dividend; remainders |= (new ushort8(1) & (dividendCast >> 7)); v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); quotients |= new ushort8(1) & subtractDivisorFromRemainder; for (int i = 6; i > 0; i--) { quotients <<= 1; remainders <<= 1; remainders |= (new ushort8(1) & (dividendCast >> i)); subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); quotients |= new ushort8(1) & subtractDivisorFromRemainder; } remainders <<= 1; quotients <<= 1; remainders |= new ushort8(1) & dividendCast; subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); quotients |= new ushort8(1) & subtractDivisorFromRemainder; byte16 temp = Sse2.packus_epi16(remainders, quotients); remainder = temp.v8_0; return(temp.v8_8); } else { throw new CPUFeatureCheckException(); } }
public static void byte16() { Random8 rng = new Random8(135); for (int i = 0; i < 64; i++) { byte16 x = rng.NextByte16(); Assert.AreEqual(new byte16((byte)_intsqrt(x.x0), (byte)_intsqrt(x.x1), (byte)_intsqrt(x.x2), (byte)_intsqrt(x.x3), (byte)_intsqrt(x.x4), (byte)_intsqrt(x.x5), (byte)_intsqrt(x.x6), (byte)_intsqrt(x.x7), (byte)_intsqrt(x.x8), (byte)_intsqrt(x.x9), (byte)_intsqrt(x.x10), (byte)_intsqrt(x.x11), (byte)_intsqrt(x.x12), (byte)_intsqrt(x.x13), (byte)_intsqrt(x.x14), (byte)_intsqrt(x.x15)), maxmath.intsqrt(x)); } }
public static int indexof(byte32 v, byte x) { if (Avx2.IsAvx2Supported) { return(math.tzcnt(Avx2.mm256_movemask_epi8(Avx2.mm256_cmpeq_epi8(v, new byte32(x))))); } else if (Sse2.IsSse2Supported) { byte16 broadcast = x; return(math.tzcnt(Sse2.movemask_epi8(Sse2.cmpeq_epi8(v._v16_0, broadcast)) | (Sse2.movemask_epi8(Sse2.cmpeq_epi8(v._v16_16, broadcast)) << 16))); } else { for (int i = 0; i < 32; i++) { if (v[i] == x) { return(i); } else { continue; } } return(32); } }
public static sbyte16 compareto(byte16 x, byte16 y) { if (Sse2.IsSse2Supported) { sbyte16 xGreatery = Operator.greater_mask_byte(x, y); sbyte16 yGreaterx = Operator.greater_mask_byte(y, x); return((0 - xGreatery) + yGreaterx); } else { return(new sbyte16((sbyte)compareto(x.x0, y.x0), (sbyte)compareto(x.x1, y.x1), (sbyte)compareto(x.x2, y.x2), (sbyte)compareto(x.x3, y.x3), (sbyte)compareto(x.x4, y.x4), (sbyte)compareto(x.x5, y.x5), (sbyte)compareto(x.x6, y.x6), (sbyte)compareto(x.x7, y.x7), (sbyte)compareto(x.x8, y.x8), (sbyte)compareto(x.x9, y.x9), (sbyte)compareto(x.x10, y.x10), (sbyte)compareto(x.x11, y.x11), (sbyte)compareto(x.x12, y.x12), (sbyte)compareto(x.x13, y.x13), (sbyte)compareto(x.x14, y.x14), (sbyte)compareto(x.x15, y.x15))); } }
public static void byte16() { Random8 rng = new Random8(135); for (int i = 0; i < 64; i++) { byte16 x = rng.NextByte16(); byte16 y = rng.NextByte16(); Assert.AreEqual(new byte16((byte)_gcd(x.x0, y.x0), (byte)_gcd(x.x1, y.x1), (byte)_gcd(x.x2, y.x2), (byte)_gcd(x.x3, y.x3), (byte)_gcd(x.x4, y.x4), (byte)_gcd(x.x5, y.x5), (byte)_gcd(x.x6, y.x6), (byte)_gcd(x.x7, y.x7), (byte)_gcd(x.x8, y.x8), (byte)_gcd(x.x9, y.x9), (byte)_gcd(x.x10, y.x10), (byte)_gcd(x.x11, y.x11), (byte)_gcd(x.x12, y.x12), (byte)_gcd(x.x13, y.x13), (byte)_gcd(x.x14, y.x14), (byte)_gcd(x.x15, y.x15)), maxmath.gcd(x, y)); } }
public static bool16 isdivisible(byte16 dividend, byte16 divisor) { Assert.AreNotEqual(0, divisor.x0); Assert.AreNotEqual(0, divisor.x1); Assert.AreNotEqual(0, divisor.x2); Assert.AreNotEqual(0, divisor.x3); Assert.AreNotEqual(0, divisor.x4); Assert.AreNotEqual(0, divisor.x5); Assert.AreNotEqual(0, divisor.x6); Assert.AreNotEqual(0, divisor.x7); Assert.AreNotEqual(0, divisor.x8); Assert.AreNotEqual(0, divisor.x9); Assert.AreNotEqual(0, divisor.x10); Assert.AreNotEqual(0, divisor.x11); Assert.AreNotEqual(0, divisor.x12); Assert.AreNotEqual(0, divisor.x13); Assert.AreNotEqual(0, divisor.x14); Assert.AreNotEqual(0, divisor.x15); if (Constant.IsConstantExpression(divisor)) { ushort16 compile = (new ushort16(ushort.MaxValue) / divisor) + 1; return(dividend * compile <= compile - 1); } else { return(dividend % divisor == 0); } }
public static byte16 reversebits(byte16 x) { x = ((x >> 1) & 0x55) | ((x & 0x55) << 1); x = ((x >> 2) & 0x33) | ((x & 0x33) << 2); return((x >> 4) | (x << 4)); }
public static byte16 tzcnt(byte16 x) { if (Ssse3.IsSsse3Supported) { v128 NIBBLE_MASK = new v128(0x0F0F_0F0F); v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0); v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4); return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)), Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))))); } else if (Sse2.IsSse2Supported) { v128 compareMask = x & (byte16)(-(sbyte16)x); byte16 first = Mask.BlendV(default(v128), new byte16(1), Sse2.cmpeq_epi8(compareMask, default(v128))); byte16 second = Mask.BlendV(default(v128), new byte16(4), Sse2.cmpeq_epi8(compareMask & (byte16)0x0F, default(v128))); byte16 third = Mask.BlendV(default(v128), new byte16(2), Sse2.cmpeq_epi8(compareMask & (byte16)0x33, default(v128))); byte16 fourth = Mask.BlendV(default(v128), new byte16(1), Sse2.cmpeq_epi8(compareMask & (byte16)0x55, default(v128))); return((first + second) + (third + fourth)); } else { return(new byte16(tzcnt(x.x0), tzcnt(x.x1), tzcnt(x.x2), tzcnt(x.x3), tzcnt(x.x4), tzcnt(x.x5), tzcnt(x.x6), tzcnt(x.x7), tzcnt(x.x8), tzcnt(x.x9), tzcnt(x.x10), tzcnt(x.x11), tzcnt(x.x12), tzcnt(x.x13), tzcnt(x.x14), tzcnt(x.x15))); } }
public static byte16 floorpow2(byte16 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x - (x >> 1)); }
public static byte16 ceilpow2(byte16 x) { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x + 1); }
internal static byte16 vrem_byte_SSE_FALLBACK(byte16 dividend, byte16 divisor) { Assert.AreNotEqual(divisor.x0, 0); Assert.AreNotEqual(divisor.x1, 0); Assert.AreNotEqual(divisor.x2, 0); Assert.AreNotEqual(divisor.x3, 0); Assert.AreNotEqual(divisor.x4, 0); Assert.AreNotEqual(divisor.x5, 0); Assert.AreNotEqual(divisor.x6, 0); Assert.AreNotEqual(divisor.x7, 0); Assert.AreNotEqual(divisor.x8, 0); Assert.AreNotEqual(divisor.x9, 0); Assert.AreNotEqual(divisor.x10, 0); Assert.AreNotEqual(divisor.x11, 0); Assert.AreNotEqual(divisor.x12, 0); Assert.AreNotEqual(divisor.x13, 0); Assert.AreNotEqual(divisor.x14, 0); Assert.AreNotEqual(divisor.x15, 0); if (Sse2.IsSse2Supported) { byte16 remainders = byte16.zero; remainders |= (new byte16(1) & (dividend >> 7)); v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisor, remainders), divisor); remainders -= Mask.BlendV(default(v128), divisor, subtractDivisorFromRemainder); for (int i = 6; i > 0; i--) { remainders <<= 1; remainders |= (new byte16(1) & (dividend >> i)); subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisor, remainders), divisor); remainders -= Mask.BlendV(default(v128), divisor, subtractDivisorFromRemainder); } remainders <<= 1;; remainders |= new byte16(1) & dividend; subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisor, remainders), divisor); remainders -= Mask.BlendV(default(v128), divisor, subtractDivisorFromRemainder); return(remainders); } else { throw new CPUFeatureCheckException(); } }
public static sbyte16 intpow(sbyte16 x, byte16 n) { if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); sbyte16 ONE = new sbyte16(1); v128 doneMask = ZERO; v128 result = ZERO; sbyte16 p = x; sbyte16 y = ONE; Loop: v128 y_times_p = y * p; y = Mask.BlendV(y, y_times_p, Sse2.cmpeq_epi8(ONE, Sse2.and_si128(ONE, n))); n >>= 1; v128 n_is_zero = Sse2.cmpeq_epi8(ZERO, n); result = Mask.BlendV(result, y, Sse2.andnot_si128(doneMask, n_is_zero)); doneMask = n_is_zero; if (bitmask32(16 * sizeof(sbyte)) != Sse2.movemask_epi8(doneMask)) { p *= p; goto Loop; } else { return(result); } } else { return(new sbyte16((sbyte)intpow((int)x.x0, n.x0), (sbyte)intpow((int)x.x1, n.x1), (sbyte)intpow((int)x.x2, n.x2), (sbyte)intpow((int)x.x3, n.x3), (sbyte)intpow((int)x.x4, n.x4), (sbyte)intpow((int)x.x5, n.x5), (sbyte)intpow((int)x.x6, n.x6), (sbyte)intpow((int)x.x7, n.x7), (sbyte)intpow((int)x.x8, n.x8), (sbyte)intpow((int)x.x9, n.x9), (sbyte)intpow((int)x.x10, n.x10), (sbyte)intpow((int)x.x11, n.x11), (sbyte)intpow((int)x.x12, n.x12), (sbyte)intpow((int)x.x13, n.x13), (sbyte)intpow((int)x.x14, n.x14), (sbyte)intpow((int)x.x15, n.x15))); } }
public static byte16 max(byte16 a, byte16 b) { if (Sse2.IsSse2Supported) { return(Sse2.max_epu8(a, b)); } else { return(new byte16((byte)math.max((uint)a.x0, (uint)b.x0), (byte)math.max((uint)a.x1, (uint)b.x1), (byte)math.max((uint)a.x2, (uint)b.x2), (byte)math.max((uint)a.x3, (uint)b.x3), (byte)math.max((uint)a.x4, (uint)b.x4), (byte)math.max((uint)a.x5, (uint)b.x5), (byte)math.max((uint)a.x6, (uint)b.x6), (byte)math.max((uint)a.x7, (uint)b.x7), (byte)math.max((uint)a.x8, (uint)b.x8), (byte)math.max((uint)a.x9, (uint)b.x9), (byte)math.max((uint)a.x10, (uint)b.x10), (byte)math.max((uint)a.x11, (uint)b.x11), (byte)math.max((uint)a.x12, (uint)b.x12), (byte)math.max((uint)a.x13, (uint)b.x13), (byte)math.max((uint)a.x14, (uint)b.x14), (byte)math.max((uint)a.x15, (uint)b.x15))); } }
public static byte16 andnot(byte16 left, byte16 right) { if (Sse2.IsSse2Supported) { return(Sse2.andnot_si128(right, left)); } else { return(left & ~right); } }
public static byte16 avg(byte16 x, byte16 y) { if (Sse2.IsSse2Supported) { return(Sse2.avg_epu8(x, y)); } else { return(new byte16((byte)((x.x0 + y.x0 + 1) >> 1), (byte)((x.x1 + y.x1 + 1) >> 1), (byte)((x.x2 + y.x2 + 1) >> 1), (byte)((x.x3 + y.x3 + 1) >> 1), (byte)((x.x4 + y.x4 + 1) >> 1), (byte)((x.x5 + y.x5 + 1) >> 1), (byte)((x.x6 + y.x6 + 1) >> 1), (byte)((x.x7 + y.x7 + 1) >> 1), (byte)((x.x8 + y.x8 + 1) >> 1), (byte)((x.x9 + y.x9 + 1) >> 1), (byte)((x.x10 + y.x10 + 1) >> 1), (byte)((x.x11 + y.x11 + 1) >> 1), (byte)((x.x12 + y.x12 + 1) >> 1), (byte)((x.x13 + y.x13 + 1) >> 1), (byte)((x.x14 + y.x14 + 1) >> 1), (byte)((x.x15 + y.x15 + 1) >> 1))); } }
public static bool all_eq(byte16 c) { if (Ssse3.IsSsse3Supported) { return(((byte16)Ssse3.shuffle_epi8(c, default(v128))).Equals(c)); } else { return((((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & (c.x0 == c.x7 & c.x0 == c.x8))) & (((c.x0 == c.x9 & c.x0 == c.x10) & (c.x0 == c.x11 & c.x0 == c.x12)) & ((c.x0 == c.x13 & c.x0 == c.x14) & c.x0 == c.x15))); } }
public static uint csum(byte16 x) { if (Sse2.IsSse2Supported) { return(sad(x, byte16.zero)); } else { return((uint)((((x.x0 + x.x1) + (x.x2 + x.x3)) + ((x.x4 + x.x5) + (x.x6 + x.x7))) + (((x.x8 + x.x9) + (x.x10 + x.x11)) + ((x.x12 + x.x13) + (x.x14 + x.x15))))); } }
public static bool any(byte16 x) { if (Sse2.IsSse2Supported) { return(bitmask32(16 * sizeof(byte)) != Sse2.movemask_epi8(Sse2.cmpeq_epi8(x, default(v128)))); } else { return(any(x != 0)); } }
public static byte16 subadd(byte16 a, byte16 b) { if (Ssse3.IsSsse3Supported) { return(a + Ssse3.sign_epi8(b, new v128(255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1))); } else { return(a - select(b, (byte16)(-(sbyte16)b), new bool16(false, true, false, true, false, true, false, true, false, true, false, true, false, true, false, true))); } }
public static bool all(byte16 x) { if (Sse2.IsSse2Supported) { return(0 == Sse2.movemask_epi8(Sse2.cmpeq_epi8(x, default(v128)))); } else { return(all(x != 0)); } }
public static byte16 divrem(byte16 dividend, byte divisor, out byte16 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (byte16)divisor, out remainder)); } }
public static bool16 toboolsafe(ushort16 x) { if (Sse2.IsSse2Supported) { return((v128)(byte16)clamp(x, 0, 1)); } else { byte16 temp = (byte16)clamp(x, 0, 1); return(*(bool16 *)&temp); } }
public static bool16 ispow2(byte16 x) { if (Sse2.IsSse2Supported) { return(Sse2.and_si128(Sse2.and_si128(Operator.greater_mask_byte(x, byte16.zero), Sse2.cmpeq_epi8(default(v128), x & (x - 1))), new byte16(1))); } else { return(new bool16(math.ispow2((uint)x.x0), math.ispow2((uint)x.x1), math.ispow2((uint)x.x2), math.ispow2((uint)x.x3), math.ispow2((uint)x.x4), math.ispow2((uint)x.x5), math.ispow2((uint)x.x6), math.ispow2((uint)x.x7), math.ispow2((uint)x.x8), math.ispow2((uint)x.x9), math.ispow2((uint)x.x10), math.ispow2((uint)x.x11), math.ispow2((uint)x.x12), math.ispow2((uint)x.x13), math.ispow2((uint)x.x14), math.ispow2((uint)x.x15))); } }
public static uint sad(byte16 a, byte16 b) { if (Sse2.IsSse2Supported) { a = Sse2.sad_epu8(a, b); return(Sse2.add_epi16(a, Sse2.shuffle_epi32(a, Sse.SHUFFLE(0, 0, 0, 2))).UShort0); } else { return((uint)(((math.abs(a.x0 - b.x0) + math.abs(a.x1 - b.x1)) + (math.abs(a.x2 - b.x2) + math.abs(a.x3 - b.x3))) + (((math.abs(a.x4 - b.x4) + math.abs(a.x5 - b.x5)) + (math.abs(a.x6 - b.x6) + math.abs(a.x7 - b.x7)))) + (((math.abs(a.x8 - b.x8) + math.abs(a.x9 - b.x9)) + (math.abs(a.x10 - b.x10) + math.abs(a.x11 - b.x11))) + ((math.abs(a.x12 - b.x12) + math.abs(a.x13 - b.x13)) + (math.abs(a.x14 - b.x14) + math.abs(a.x15 - b.x15)))))); } }