public byte8(byte3 x012, byte2 x34, byte3 x567) { if (Sse2.IsSse2Supported) { v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(byte)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(x34, hi, 0b0110); } else { hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0110); } hi = Sse2.bslli_si128(hi, 3 * sizeof(byte)); this = Mask.BlendV(x012, hi, new byte8(0, 0, 0, 255, 255, 255, 255, 255)); } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x34.x; this.x4 = x34.y; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public byte2x4(byte v) { this.c0 = v; this.c1 = v; this.c2 = v; this.c3 = v; }
public static byte2 tzcnt(byte2 x) { if (Ssse3.IsSsse3Supported) { v128 NIBBLE_MASK = new v128(0x0F0F_0F0F); v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0); v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4); return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)), Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))))); } else if (Sse2.IsSse2Supported) { v128 compareMask = x & (byte2)(-(sbyte2)x); byte2 first = Mask.BlendV(default(v128), new byte4(1), Sse2.cmpeq_epi8(compareMask, default(v128))); byte2 second = Mask.BlendV(default(v128), new byte4(4), Sse2.cmpeq_epi8(compareMask & (byte4)0x0F, default(v128))); byte2 third = Mask.BlendV(default(v128), new byte4(2), Sse2.cmpeq_epi8(compareMask & (byte4)0x33, default(v128))); byte2 fourth = Mask.BlendV(default(v128), new byte4(1), Sse2.cmpeq_epi8(compareMask & (byte4)0x55, default(v128))); return((first + second) + (third + fourth)); } else { return(new byte2(tzcnt(x.x), tzcnt(x.y))); } }
public static byte2 reversebits(byte2 x) { x = ((x >> 1) & 0x55) | ((x & 0x55) << 1); x = ((x >> 2) & 0x33) | ((x & 0x33) << 2); return((x >> 4) | (x << 4)); }
public byte2x3(byte m00, byte m01, byte m02, byte m10, byte m11, byte m12) { this.c0 = new byte2(m00, m10); this.c1 = new byte2(m01, m11); this.c2 = new byte2(m02, m12); }
public byte8(byte2 x01, byte3 x234, byte3 x567) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(byte)); v128 hi = Sse2.bslli_si128(x567, 5 * sizeof(byte)); hi = Mask.BlendV(mid, hi, new byte8(0, 0, 0, 0, 0, 255, 255, 255)); if (Sse4_1.IsSse41Supported) { this = Sse4_1.blend_epi16(x01, hi, 0b1110); } else { this = Mask.BlendEpi16_SSE2(x01, hi, 0b1110); } } else { this.x0 = x01.x; this.x1 = x01.y; this.x2 = x234.x; this.x3 = x234.y; this.x4 = x234.z; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public byte2x4(byte2 c0, byte2 c1, byte2 c2, byte2 c3) { this.c0 = c0; this.c1 = c1; this.c2 = c2; this.c3 = c3; }
public static byte2 lcm(sbyte2 x, sbyte2 y) { byte2 absX = (byte2)abs(x); byte2 absY = (byte2)abs(y); return((absX / gcd(absX, absY)) * absY); }
public byte8(byte3 x012, byte3 x345, byte2 x67) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(byte)); v128 hi = Sse2.bslli_si128(x67, 6 * sizeof(byte)); mid = Mask.BlendV(x012, mid, new byte8(0, 0, 0, 255, 255, 255, 0, 0)); if (Sse4_1.IsSse41Supported) { this = Sse4_1.blend_epi16(mid, hi, 0b1000); } else { this = Mask.BlendEpi16_SSE2(mid, hi, 0b1000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x345.x; this.x4 = x345.y; this.x5 = x345.z; this.x6 = x67.x; this.x7 = x67.y; } }
public static byte2 floorpow2(byte2 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x - (x >> 1)); }
public byte2x4(byte m00, byte m01, byte m02, byte m03, byte m10, byte m11, byte m12, byte m13) { this.c0 = new byte2(m00, m10); this.c1 = new byte2(m01, m11); this.c2 = new byte2(m02, m12); this.c3 = new byte2(m03, m13); }
public static byte2 ceilpow2(byte2 x) { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; return(x + 1); }
internal static byte2 vdiv_byte(byte2 dividend, byte2 divisor) { Assert.AreNotEqual(divisor.x, 0); Assert.AreNotEqual(divisor.y, 0); v128 floatResult = vdiv_byte_quotient((int2)dividend, (int2)divisor); return((byte2)(*(float2 *)&floatResult)); }
public static byte2 andnot(byte2 left, byte2 right) { if (Sse2.IsSse2Supported) { return(Sse2.andnot_si128(right, left)); } else { return(left & ~right); } }
public static bool any(byte2 x) { if (Sse2.IsSse2Supported) { return(0 != Sse2.extract_epi16(x, 0)); } else { return(math.any(x != 0)); } }
public static quarter2 asquarter(byte2 x) { if (Sse.IsSseSupported) { return((v128)x); } else { return(*(quarter2 *)&x); } }
public static byte2 avg(byte2 x, byte2 y) { if (Sse2.IsSse2Supported) { return(Sse2.avg_epu8(x, y)); } else { return(new byte2((byte)((x.x + y.x + 1) >> 1), (byte)((x.y + y.y + 1) >> 1))); } }
public static byte avg(byte2 c) { if (Sse2.IsSse2Supported) { return(Sse2.avg_epu8(c, Sse2.bsrli_si128(c, 1 * sizeof(byte))).Byte0); } else { return((byte)((1u + csum(c)) / 2u)); } }
public static byte2 max(byte2 a, byte2 b) { if (Sse2.IsSse2Supported) { return(Sse2.max_epu8(a, b)); } else { return(new byte2((byte)math.max((uint)a.x, (uint)b.x), (byte)math.max((uint)a.y, (uint)b.y))); } }
public static byte cmin(byte2 x) { if (Ssse3.IsSsse3Supported) { return(min(x, x.yy).x); } else { return((byte)math.min((uint)x.x, (uint)x.y)); } }
public static bool all(byte2 x) { if (Sse2.IsSse2Supported) { return(0 == Sse2.extract_epi16(Sse2.cmpeq_epi8(x, default(v128)), 0)); } else { return(math.all(x != 0)); } }
public static byte2 subadd(byte2 a, byte2 b) { if (Ssse3.IsSsse3Supported) { return(a + Ssse3.sign_epi8(b, new byte2(255, 1))); } else { return(a - select(b, (byte2)(-(sbyte2)b), new bool2(false, true))); } }
internal static byte2 vrem_byte(byte2 dividend, byte2 divisor) { Assert.AreNotEqual(divisor.x, 0); Assert.AreNotEqual(divisor.y, 0); int2 castDividend = dividend; int2 castDivisor = divisor; v128 floatResult = vdiv_byte_quotient(castDividend, castDivisor); return((byte2)(castDividend - ((int2)(*(float2 *)&floatResult) * castDivisor))); }
public static byte2 divrem(byte2 dividend, byte divisor, out byte2 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (byte2)divisor, out remainder)); } }
public static byte2 divrem(byte2 dividend, byte2 divisor, out byte2 remainder) { if (Sse2.IsSse2Supported) { return(Operator.vdivrem_byte(dividend, divisor, out remainder)); } else { remainder = dividend % divisor; return(dividend / divisor); } }
internal static byte2 vdivrem_byte(byte2 dividend, byte2 divisor, out byte2 remainder) { Assert.AreNotEqual(divisor.x, 0); Assert.AreNotEqual(divisor.y, 0); int2 castDividend = dividend; int2 castDivisor = divisor; v128 floatResult = vdiv_byte_quotient(castDividend, castDivisor); int2 quotientCast = (int2)(*(float2 *)&floatResult); remainder = (byte2)(castDividend - quotientCast * castDivisor); return((byte2)quotientCast); }
public byte2 NextByte(byte2 max) { if (Ssse3.IsSsse3Supported) { short2 temp = (short2)max * new short2(NextState(), NextState()); return(Ssse3.shuffle_epi8(temp, new byte4(1, 3, 0, 0))); } else { return((byte2)(((short2)max * new short2(NextState(), NextState())) >> 8)); } }
public static bool2 ispow2(byte2 x) { if (Sse2.IsSse2Supported) { v128 result = Sse2.and_si128(Sse2.and_si128(Operator.greater_mask_byte(x, default(v128)), Sse2.cmpeq_epi8(default(v128), x & (x - 1))), new byte16(1)); return(*(bool2 *)&result); } else { return(new bool2(math.ispow2((uint)x.x), math.ispow2((uint)x.y))); } }
public static uint sad(byte2 a, byte2 b) { if (Sse2.IsSse2Supported) { v128 maskedA = Sse2.and_si128(a, new v128(maxmath.bitmask32(16), 0, 0, 0)); v128 maskedB = Sse2.and_si128(b, new v128(maxmath.bitmask32(16), 0, 0, 0)); return(Sse2.sad_epu8(maskedA, maskedB).UShort0); } else { return((uint)(math.abs(a.x - b.x) + math.abs(a.y - b.y))); } }
public static byte2 gcd(byte2 x, byte2 y) { if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); v128 result = ZERO; v128 result_if_zero_any = ZERO; v128 x_is_zero = Sse2.cmpeq_epi8(x, ZERO); v128 y_is_zero = Sse2.cmpeq_epi8(y, ZERO); v128 any_zero = Sse2.or_si128(x_is_zero, y_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero); v128 doneMask = any_zero; byte2 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); v128 tempX = x; x = Sse2.min_epu8(x, y); y = Sse2.max_epu8(y, tempX); y -= x; v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi8(y, ZERO)); result = Mask.BlendV(result, x, loopCheck); doneMask = Sse2.or_si128(doneMask, loopCheck); } while (-1 != doneMask.SShort0); result = shl(result, shift); result = Mask.BlendV(result, result_if_zero_any, any_zero); return(result); } else { return(new byte2((byte)gcd((uint)x.x, (uint)y.x), (byte)gcd((uint)x.y, (uint)y.y))); } }