public ushort8(ushort2 x01, ushort3 x234, ushort3 x567) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(ushort)); v128 hi = Sse2.bslli_si128(x567, 5 * sizeof(ushort)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(mid, hi, 0b1110_0000); this = Sse4_1.blend_epi16(x01, hi, 0b1111_1100); } else { hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1110_0000); this = Mask.BlendEpi16_SSE2(x01, hi, 0b1111_1100); } } else { this.x0 = x01.x; this.x1 = x01.y; this.x2 = x234.x; this.x3 = x234.y; this.x4 = x234.z; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public ushort2x3(ushort m00, ushort m01, ushort m02, ushort m10, ushort m11, ushort m12) { this.c0 = new ushort2(m00, m10); this.c1 = new ushort2(m01, m11); this.c2 = new ushort2(m02, m12); }
public static ushort2 lcm(short2 x, short2 y) { ushort2 absX = (ushort2)abs(x); ushort2 absY = (ushort2)abs(y); return((absX / gcd(absX, absY)) * absY); }
public ushort8(ushort3 x012, ushort2 x34, ushort3 x567) { if (Sse2.IsSse2Supported) { v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(short)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(x34, hi, 0b0001_1100); hi = Sse2.bslli_si128(hi, 3 * sizeof(short)); this = Sse4_1.blend_epi16(x012, hi, 0b1111_1000); } else { hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0001_1100); hi = Sse2.bslli_si128(hi, 3 * sizeof(short)); this = Mask.BlendEpi16_SSE2(x012, hi, 0b1111_1000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x34.x; this.x4 = x34.y; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public ushort8(ushort3 x012, ushort3 x345, ushort2 x67) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(ushort)); v128 hi = Sse2.bslli_si128(x67, 6 * sizeof(ushort)); if (Sse4_1.IsSse41Supported) { mid = Sse4_1.blend_epi16(x012, mid, 0b0011_1000); this = Sse4_1.blend_epi16(mid, hi, 0b1100_0000); } else { mid = Mask.BlendEpi16_SSE2(x012, mid, 0b0011_1000); this = Mask.BlendEpi16_SSE2(mid, hi, 0b1100_0000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x345.x; this.x4 = x345.y; this.x5 = x345.z; this.x6 = x67.x; this.x7 = x67.y; } }
public ushort2x4(ushort v) { this.c0 = v; this.c1 = v; this.c2 = v; this.c3 = v; }
public ushort2x4(ushort2 c0, ushort2 c1, ushort2 c2, ushort2 c3) { this.c0 = c0; this.c1 = c1; this.c2 = c2; this.c3 = c3; }
public static ushort2 tzcnt(ushort2 x) { if (Ssse3.IsSsse3Supported) { v128 NIBBLE_MASK = new v128(0x0F0F_0F0F); v128 SHUFFLE_MASK_LO = new v128(16, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0); v128 SHUFFLE_MASK_HI = new v128(16, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4); v128 tzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)), Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))); return(Sse2.min_epu8(tzcnt_bytes, Sse2.srli_epi16(Sse2.add_epi8(tzcnt_bytes, Sse2.set1_epi8(8)), 8))); } else if (Sse2.IsSse2Supported) { v128 compareMask = x & (ushort2)(-((short2)x)); ushort2 first = Mask.BlendV(default(v128), new ushort2(1), Sse2.cmpeq_epi16(compareMask, default(v128))); ushort2 second = Mask.BlendV(default(v128), new ushort2(8), Sse2.cmpeq_epi16(compareMask & (ushort2)0x00FF, default(v128))); ushort2 third = Mask.BlendV(default(v128), new ushort2(4), Sse2.cmpeq_epi16(compareMask & (ushort2)0x0F0F, default(v128))); ushort2 fourth = Mask.BlendV(default(v128), new ushort2(2), Sse2.cmpeq_epi16(compareMask & (ushort2)0x3333, default(v128))); ushort2 fifth = Mask.BlendV(default(v128), new ushort2(1), Sse2.cmpeq_epi16(compareMask & (ushort2)0x5555, default(v128))); return((first + second) + ((third + fourth) + fifth)); } else { return(new ushort2(tzcnt(x.x), tzcnt(x.y))); } }
public static ushort2 gcd(ushort2 x, ushort2 y) { if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); v128 result = ZERO; v128 result_if_zero_any = ZERO; v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO); v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO); v128 any_zero = Sse2.or_si128(x_is_zero, y_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero); v128 doneMask = any_zero; ushort2 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); if (Sse4_1.IsSse41Supported) { v128 tempX = x; x = Sse4_1.min_epu16(x, y); y = Sse4_1.max_epu16(y, tempX); } else { v128 tempX = x; v128 x_greater_y = Operator.greater_mask_ushort(x, y); x = Mask.BlendV(x, y, x_greater_y); y = Mask.BlendV(y, tempX, x_greater_y); } y -= x; v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO)); result = Mask.BlendV(result, x, loopCheck); doneMask = Sse2.or_si128(doneMask, loopCheck); } while (-1 != doneMask.SInt0); result = shl(result, shift); result = Mask.BlendV(result, result_if_zero_any, any_zero); return(result); } else { return(new ushort2((ushort)gcd((uint)x.x, (uint)y.x), (ushort)gcd((uint)x.y, (uint)y.y))); } }
public ushort2x4(ushort m00, ushort m01, ushort m02, ushort m03, ushort m10, ushort m11, ushort m12, ushort m13) { this.c0 = new ushort2(m00, m10); this.c1 = new ushort2(m01, m11); this.c2 = new ushort2(m02, m12); this.c3 = new ushort2(m03, m13); }
public static ushort2 reversebits(ushort2 x) { x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1); x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2); x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4); return((x >> 8) | (x << 8)); }
public static ushort2 floorpow2(ushort2 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; return(x - (x >> 1)); }
public static ushort2 ceilpow2(ushort2 x) { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; return(x + 1); }
public static ushort avg(ushort2 c) { if (Sse2.IsSse2Supported) { return(Sse2.avg_epu16(c, Sse2.bsrli_si128(c, 1 * sizeof(ushort))).UShort0); } else { return((ushort)((1u + csum(c)) / 2u)); } }
public static ushort2 avg(ushort2 x, ushort2 y) { if (Sse2.IsSse2Supported) { return(Sse2.avg_epu16(x, y)); } else { return(new ushort2((ushort)((x.x + y.x + 1) >> 1), (ushort)((x.y + y.y + 1) >> 1))); } }
public static ushort cmin(ushort2 x) { if (Sse2.IsSse2Supported) { return(min(x, x.yy).x); } else { return((ushort)math.min((uint)x.x, (uint)x.y)); } }
public static ushort2 andnot(ushort2 left, ushort2 right) { if (Sse2.IsSse2Supported) { return(Sse2.andnot_si128(right, left)); } else { return(left & ~right); } }
public static ushort2 subadd(ushort2 a, ushort2 b) { if (Ssse3.IsSsse3Supported) { return(a + Ssse3.sign_epi16(b, new ushort2(ushort.MaxValue, 1))); } else { return(a - select(b, (ushort2)(-(short2)b), new bool2(false, true))); } }
public static ushort2 divrem(ushort2 dividend, ushort divisor, out ushort2 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (ushort2)divisor, out remainder)); } }
public static bool2 ispow2(ushort2 x) { if (Sse2.IsSse2Supported) { v128 result = (byte2)(new ushort2(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)), Sse2.cmpeq_epi16(default(v128), x & (x - 1)))); return(*(bool2 *)&result); } else { return(new bool2(math.ispow2((uint)x.x), math.ispow2((uint)x.y))); } }
internal static v128 greater_mask_ushort(ushort2 left, ushort2 right) { if (Sse2.IsSse2Supported) { ushort2 mask = 1 << 15; return(Sse2.cmpgt_epi16(Sse2.xor_si128(left, mask), Sse2.xor_si128(right, mask))); } else { throw new CPUFeatureCheckException(); } }
public ushort2 NextUShort2(ushort2 max) { Assert.IsPositive(max.x); Assert.IsPositive(max.y); if (Sse2.IsSse2Supported) { return(Sse2.mulhi_epi16(max, new ushort2((ushort)NextState(), (ushort)NextState()))); } else { return((ushort2)(((uint2)max * new uint2(NextState(), NextState())) >> 16)); } }
public ushort2 NextUShort2(ushort2 min, ushort2 max) { Assert.IsNotSmaller(max.x, min.x); Assert.IsNotSmaller(max.y, min.y); if (Sse2.IsSse2Supported) { return(min + Sse2.mulhi_epi16(max - min, new ushort2((ushort)NextState(), (ushort)NextState()))); } else { return(min + (ushort2)(((uint2)(max - min) * new uint2(NextState(), NextState())) >> 16)); } }
public static ushort2 max(ushort2 a, ushort2 b) { if (Sse4_1.IsSse41Supported) { return(Sse4_1.max_epu16(a, b)); } else if (Sse2.IsSse2Supported) { return(Mask.BlendV(a, b, Operator.greater_mask_ushort(b, a))); } else { return(new ushort2((ushort)math.max((uint)a.x, (uint)b.x), (ushort)math.max((uint)a.y, (uint)b.y))); } }
public static short2 compareto(ushort2 x, ushort2 y) { if (Sse2.IsSse2Supported) { short2 xGreatery = Operator.greater_mask_ushort(x, y); short2 yGreaterx = Operator.greater_mask_ushort(y, x); return((0 - xGreatery) + yGreaterx); } else { return(new short2((short)compareto(x.x, y.x), (short)compareto(x.y, y.y))); } }
public static bool2 isdivisible(ushort2 dividend, ushort2 divisor) { Assert.AreNotEqual(0, divisor.x); Assert.AreNotEqual(0, divisor.y); if (Constant.IsConstantExpression(divisor)) { uint2 compile = (new uint2(uint.MaxValue) / divisor) + 1; return(dividend * compile <= compile - 1); } else { return(dividend % divisor == 0); } }
public sbyte2 NextSByte2(sbyte2 min, sbyte2 max) { Assert.IsNotSmaller(max.x, min.x); Assert.IsNotSmaller(max.y, min.y); if (Ssse3.IsSsse3Supported) { ushort2 temp = (ushort2)(max - min) * new ushort2(NextState(), NextState()); return(min + Ssse3.shuffle_epi8(temp, new byte4(1, 3, 0, 0))); } else { return(min + (sbyte2)(((ushort2)(max - min) * new ushort2(NextState(), NextState())) >> 8)); } }
public static ushort2 divrem(ushort2 dividend, ushort2 divisor, out ushort2 remainder) { if (Sse2.IsSse2Supported) { ushort2 quotient = dividend / divisor; remainder = dividend - (quotient * divisor); return(quotient); } else { remainder = dividend % divisor; return(dividend / divisor); } }
public static ushort2 lzcnt(ushort2 x) { if (Ssse3.IsSsse3Supported) { v128 NIBBLE_MASK = new v128(0x0F0F_0F0F); v128 SHUFFLE_MASK_LO = new v128(16, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4); v128 SHUFFLE_MASK_HI = new v128(16, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); v128 lzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)), Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))); return(Sse2.min_epu8(Sse2.add_epi8(lzcnt_bytes, Sse2.set1_epi16(8)), Sse2.srli_epi16(lzcnt_bytes, 8))); } else if (Sse2.IsSse2Supported) { ushort2 y; ushort2 n = 16; ushort2 mask; y = x >> 8; mask = Sse2.cmpeq_epi16(y, default(v128)); n = Mask.BlendV(n - 8, n, mask); x = Mask.BlendV(y, x, mask); y = x >> 4; mask = Sse2.cmpeq_epi16(y, default(v128)); n = Mask.BlendV(n - 4, n, mask); x = Mask.BlendV(y, x, mask); y = x >> 2; mask = Sse2.cmpeq_epi16(y, default(v128)); n = Mask.BlendV(n - 2, n, mask); x = Mask.BlendV(y, x, mask); y = x >> 1; mask = Sse2.cmpeq_epi16(y, default(v128)); return(Mask.BlendV(n - 2, n - x, mask)); } else { return(new ushort2(lzcnt(x.x), lzcnt(x.y))); } }
public ushort8(ushort2 x01, ushort2 x23, ushort2 x45, ushort2 x67) { if (Sse2.IsSse2Supported) { this = new ushort8(new ushort4(x01, x23), new ushort4(x45, x67)); } else { this.x0 = x01.x; this.x1 = x01.y; this.x2 = x23.x; this.x3 = x23.y; this.x4 = x45.x; this.x5 = x45.y; this.x6 = x67.x; this.x7 = x67.y; } }