public static byte8 tzcnt(byte8 x) { if (Ssse3.IsSsse3Supported) { v128 NIBBLE_MASK = new v128(0x0F0F_0F0F); v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0); v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4); return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)), Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))))); } else if (Sse2.IsSse2Supported) { v128 compareMask = x & (byte8)(-(sbyte8)x); byte8 first = Mask.BlendV(default(v128), new byte8(1), Sse2.cmpeq_epi8(compareMask, default(v128))); byte8 second = Mask.BlendV(default(v128), new byte8(4), Sse2.cmpeq_epi8(compareMask & (byte8)0x0F, default(v128))); byte8 third = Mask.BlendV(default(v128), new byte8(2), Sse2.cmpeq_epi8(compareMask & (byte8)0x33, default(v128))); byte8 fourth = Mask.BlendV(default(v128), new byte8(1), Sse2.cmpeq_epi8(compareMask & (byte8)0x55, default(v128))); return((first + second) + (third + fourth)); } else { return(new byte8(tzcnt(x.x0), tzcnt(x.x1), tzcnt(x.x2), tzcnt(x.x3), tzcnt(x.x4), tzcnt(x.x5), tzcnt(x.x6), tzcnt(x.x7))); } }
internal static byte8 vrem_byte(byte8 dividend, byte8 divisor) { int8 castDividend = dividend; int8 castDivisor = divisor; return((byte8)(castDividend - ((int8)vdiv_byte_quotient(castDividend, castDivisor) * castDivisor))); }
internal static byte8 vdivrem_byte_SSE_FALLBACK(byte8 dividend, byte8 divisor, out byte8 remainder) { Assert.AreNotEqual(divisor.x0, 0); Assert.AreNotEqual(divisor.x1, 0); Assert.AreNotEqual(divisor.x2, 0); Assert.AreNotEqual(divisor.x3, 0); Assert.AreNotEqual(divisor.x4, 0); Assert.AreNotEqual(divisor.x5, 0); Assert.AreNotEqual(divisor.x6, 0); Assert.AreNotEqual(divisor.x7, 0); if (Sse2.IsSse2Supported) { ushort8 quotients = ushort8.zero; ushort8 remainders = ushort8.zero; ushort8 divisorCast = divisor; ushort8 dividendCast = dividend; remainders |= (new ushort8(1) & (dividendCast >> 7)); v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); quotients |= new ushort8(1) & subtractDivisorFromRemainder; for (int i = 6; i > 0; i--) { quotients <<= 1; remainders <<= 1; remainders |= (new ushort8(1) & (dividendCast >> i)); subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); quotients |= new ushort8(1) & subtractDivisorFromRemainder; } remainders <<= 1; quotients <<= 1; remainders |= new ushort8(1) & dividendCast; subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast); remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder); quotients |= new ushort8(1) & subtractDivisorFromRemainder; byte16 temp = Sse2.packus_epi16(remainders, quotients); remainder = temp.v8_0; return(temp.v8_8); } else { throw new CPUFeatureCheckException(); } }
public static byte8 reversebits(byte8 x) { x = ((x >> 1) & 0x55) | ((x & 0x55) << 1); x = ((x >> 2) & 0x33) | ((x & 0x33) << 2); return((x >> 4) | (x << 4)); }
public static byte2x3 operator %(byte2x3 left, byte2x3 right) { if (Sse2.IsSse2Supported) { #if DEBUG byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1), Sse2.unpacklo_epi16(left.c2, new byte2(1))); byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1), Sse2.unpacklo_epi16(right.c2, new byte2(1))); byte8 rem = packed_LHS % packed_RHS; return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4)); #else byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1), left.c2); byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1), right.c2); byte8 rem = packed_LHS % packed_RHS; return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4)); #endif } else { return(new byte2x3(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2)); } }
public static byte2x3 operator %(byte2x3 left, byte right) { if (Sse2.IsSse2Supported) { if (!Constant.IsConstantExpression(right)) { #if DEBUG byte8 packed = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1), Sse2.unpacklo_epi16(left.c2, new byte2(1))); byte8 rem = packed % right; return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4)); #else byte8 packed = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1), left.c2); byte8 rem = packed % right; return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4)); #endif } } return(new byte2x3(left.c0 % right, left.c1 % right, left.c2 % right)); }
public static byte2x3 operator /(byte2x3 left, byte2x3 right) { if (Sse2.IsSse2Supported) { #if DEBUG byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1), Sse2.unpacklo_epi16(left.c2, new byte2(1))); byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1), Sse2.unpacklo_epi16(right.c2, new byte2(1))); byte8 div = packed_LHS / packed_RHS; return(new byte2x3(div.v2_0, div.v2_2, div.v2_4)); #else byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1), left.c2); byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1), right.c2); byte8 div = packed_LHS / packed_RHS; return(new byte2x3(div.v2_0, div.v2_2, div.v2_4)); #endif } else { return(new byte2x3(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2)); } }
public static byte8 lcm(sbyte8 x, sbyte8 y) { byte8 absX = (byte8)abs(x); byte8 absY = (byte8)abs(y); return((absX / gcd(absX, absY)) * absY); }
public static byte8 bitmask8(byte8 numBits, byte8 index = default(byte8)) { Assert.IsBetween(index.x0, 0u, 8u); Assert.IsBetween(index.x1, 0u, 8u); Assert.IsBetween(index.x2, 0u, 8u); Assert.IsBetween(index.x3, 0u, 8u); Assert.IsBetween(index.x4, 0u, 8u); Assert.IsBetween(index.x5, 0u, 8u); Assert.IsBetween(index.x6, 0u, 8u); Assert.IsBetween(index.x7, 0u, 8u); Assert.IsBetween(numBits.x0, 0u, 8u - index.x0); Assert.IsBetween(numBits.x1, 0u, 8u - index.x1); Assert.IsBetween(numBits.x2, 0u, 8u - index.x2); Assert.IsBetween(numBits.x3, 0u, 8u - index.x3); Assert.IsBetween(numBits.x4, 0u, 8u - index.x4); Assert.IsBetween(numBits.x5, 0u, 8u - index.x5); Assert.IsBetween(numBits.x6, 0u, 8u - index.x6); Assert.IsBetween(numBits.x7, 0u, 8u - index.x7); if (Sse2.IsSse2Supported) { // mask index = shl(byte.MaxValue, index); v128 isMaxBitsMask = Sse2.cmpeq_epi8(numBits, new byte8(8)); return(isMaxBitsMask | andnot(index, shl(index, numBits))); } else { return((byte8)(-toint16(numBits == 16)) | andnot(index, shl(index, numBits))); } }
public static byte8 floorpow2(byte8 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x - (x >> 1)); }
public static byte8 ceilpow2(byte8 x) { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x + 1); }
internal static byte8 vdivrem_byte(byte8 dividend, byte8 divisor, out byte8 remainder) { int8 castDividend = dividend; int8 castDivisor = divisor; int8 quotientCast = (int8)vdiv_byte_quotient(castDividend, castDivisor); remainder = (byte8)(castDividend - quotientCast * castDivisor); return((byte8)quotientCast); }
public static byte8 gcd(byte8 x, byte8 y) { if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); v128 result = ZERO; v128 result_if_zero_any = ZERO; v128 x_is_zero = Sse2.cmpeq_epi8(x, ZERO); v128 y_is_zero = Sse2.cmpeq_epi8(y, ZERO); v128 any_zero = Sse2.or_si128(x_is_zero, y_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero); v128 doneMask = any_zero; byte8 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); v128 tempX = x; x = Sse2.min_epu8(x, y); y = Sse2.max_epu8(y, tempX); y -= x; v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi8(y, ZERO)); result = Mask.BlendV(result, x, loopCheck); doneMask = Sse2.or_si128(doneMask, loopCheck); } while (-1 != doneMask.SLong0); result = shl(result, shift); result = Mask.BlendV(result, result_if_zero_any, any_zero); return(result); } else { return(new byte8((byte)gcd((uint)x.x0, (uint)y.x0), (byte)gcd((uint)x.x1, (uint)y.x1), (byte)gcd((uint)x.x2, (uint)y.x2), (byte)gcd((uint)x.x3, (uint)y.x3), (byte)gcd((uint)x.x4, (uint)y.x4), (byte)gcd((uint)x.x5, (uint)y.x5), (byte)gcd((uint)x.x6, (uint)y.x6), (byte)gcd((uint)x.x7, (uint)y.x7))); } }
public static byte8 andnot(byte8 left, byte8 right) { if (Sse2.IsSse2Supported) { return(Sse2.andnot_si128(right, left)); } else { return(left & ~right); } }
public DebuggerProxy(byte8 v) { x0 = v.x0; x1 = v.x1; x2 = v.x2; x3 = v.x3; x4 = v.x4; x5 = v.x5; x6 = v.x6; x7 = v.x7; }
public static uint sad(byte8 a, byte8 b) { if (Sse2.IsSse2Supported) { return(Sse2.sad_epu8(a, b).UShort0); } else { return((uint)(((math.abs(a.x0 - b.x0) + math.abs(a.x1 - b.x1)) + (math.abs(a.x2 - b.x2) + math.abs(a.x3 - b.x3))) + ((math.abs(a.x4 - b.x4) + math.abs(a.x5 - b.x5)) + (math.abs(a.x6 - b.x6) + math.abs(a.x7 - b.x7))))); } }
public static bool any(byte8 x) { if (Sse2.IsSse2Supported) { return(0 != ((v128)x).ULong0); } else { return(any(x != 0)); } }
public static bool all_eq(byte8 c) { if (Ssse3.IsSsse3Supported) { return(((byte8)Ssse3.shuffle_epi8(c, default(v128))).Equals(c)); } else { return(((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & c.x0 == c.x7)); } }
public static byte8 avg(byte8 x, byte8 y) { if (Sse2.IsSse2Supported) { return(Sse2.avg_epu8(x, y)); } else { return(new byte8((byte)((x.x0 + y.x0 + 1) >> 1), (byte)((x.x1 + y.x1 + 1) >> 1), (byte)((x.x2 + y.x2 + 1) >> 1), (byte)((x.x3 + y.x3 + 1) >> 1), (byte)((x.x4 + y.x4 + 1) >> 1), (byte)((x.x5 + y.x5 + 1) >> 1), (byte)((x.x6 + y.x6 + 1) >> 1), (byte)((x.x7 + y.x7 + 1) >> 1))); } }
public static quarter8 asquarter(byte8 x) { if (Sse.IsSseSupported) { return((v128)x); } else { return(*(quarter8 *)&x); } }
public static byte8 max(byte8 a, byte8 b) { if (Sse2.IsSse2Supported) { return(Sse2.max_epu8(a, b)); } else { return(new byte8((byte)math.max((uint)a.x0, (uint)b.x0), (byte)math.max((uint)a.x1, (uint)b.x1), (byte)math.max((uint)a.x2, (uint)b.x2), (byte)math.max((uint)a.x3, (uint)b.x3), (byte)math.max((uint)a.x4, (uint)b.x4), (byte)math.max((uint)a.x5, (uint)b.x5), (byte)math.max((uint)a.x6, (uint)b.x6), (byte)math.max((uint)a.x7, (uint)b.x7))); } }
public static uint csum(byte8 x) { if (Sse2.IsSse2Supported) { return(sad(x, byte8.zero)); } else { return((uint)(((x.x0 + x.x1) + (x.x2 + x.x3)) + ((x.x4 + x.x5) + (x.x6 + x.x7)))); } }
public static byte8 subadd(byte8 a, byte8 b) { if (Ssse3.IsSsse3Supported) { return(a + Ssse3.sign_epi8(b, new byte8(255, 1, 255, 1, 255, 1, 255, 1))); } else { return(a - select(b, (byte8)(-(sbyte8)b), new bool8(false, true, false, true, false, true, false, true))); } }
public static byte8 countbits(byte8 x) { if (Ssse3.IsSsse3Supported) { return((v128)countbits((byte16)(v128)x)); } else { return(new byte8((byte)math.countbits((uint)x.x0), (byte)math.countbits((uint)x.x1), (byte)math.countbits((uint)x.x2), (byte)math.countbits((uint)x.x3), (byte)math.countbits((uint)x.x4), (byte)math.countbits((uint)x.x5), (byte)math.countbits((uint)x.x6), (byte)math.countbits((uint)x.x7))); } }
public static bool all(byte8 x) { if (Sse2.IsSse2Supported) { return(0 == Sse2.cmpeq_epi8(x, default(v128)).ULong0); } else { return(all(x != 0)); } }
public static byte8 divrem(byte8 dividend, byte divisor, out byte8 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (byte8)divisor, out remainder)); } }
public static byte2x4 operator %(byte2x4 left, byte2x4 right) { if (Sse2.IsSse2Supported) { byte8 rem = new byte8(left.c0, left.c1, left.c2, left.c3) % new byte8(right.c0, right.c1, right.c2, right.c3); return(new byte2x4(rem.v2_0, rem.v2_2, rem.v2_4, rem.v2_6)); } else { return(new byte2x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3)); } }
public static byte2x4 operator /(byte2x4 left, byte2x4 right) { if (Sse2.IsSse2Supported) { byte8 div = new byte8(left.c0, left.c1, left.c2, left.c3) / new byte8(right.c0, right.c1, right.c2, right.c3); return(new byte2x4(div.v2_0, div.v2_2, div.v2_4, div.v2_6)); } else { return(new byte2x4(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2, left.c3 / right.c3)); } }
public static bool8 ispow2(byte8 x) { if (Sse2.IsSse2Supported) { return(Sse2.and_si128(Sse2.and_si128(Operator.greater_mask_byte(x, byte8.zero), Sse2.cmpeq_epi8(default(v128), x & (x - 1))), new byte16(1))); } else { return(new bool8(math.ispow2((uint)x.x0), math.ispow2((uint)x.x1), math.ispow2((uint)x.x2), math.ispow2((uint)x.x3), math.ispow2((uint)x.x4), math.ispow2((uint)x.x5), math.ispow2((uint)x.x6), math.ispow2((uint)x.x7))); } }
public byte8 NextByte8(byte8 max) { if (Ssse3.IsSsse3Supported) { short8 temp = (short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState()); return(Ssse3.shuffle_epi8(temp, new byte8(1, 3, 5, 7, 9, 11, 13, 15))); } else { return((byte8)(((short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState())) >> 8)); } }