public static ushort3 tzcnt(ushort3 x) { if (Ssse3.IsSsse3Supported) { v128 NIBBLE_MASK = new v128(0x0F0F_0F0F); v128 SHUFFLE_MASK_LO = new v128(16, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0); v128 SHUFFLE_MASK_HI = new v128(16, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4); v128 tzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)), Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))); return(Sse2.min_epu8(tzcnt_bytes, Sse2.srli_epi16(Sse2.add_epi8(tzcnt_bytes, Sse2.set1_epi8(8)), 8))); } else if (Sse2.IsSse2Supported) { v128 compareMask = x & (ushort3)(-((short3)x)); ushort3 first = Mask.BlendV(default(v128), new ushort4(1), Sse2.cmpeq_epi16(compareMask, default(v128))); ushort3 second = Mask.BlendV(default(v128), new ushort4(8), Sse2.cmpeq_epi16(compareMask & (ushort4)0x00FF, default(v128))); ushort3 third = Mask.BlendV(default(v128), new ushort4(4), Sse2.cmpeq_epi16(compareMask & (ushort4)0x0F0F, default(v128))); ushort3 fourth = Mask.BlendV(default(v128), new ushort4(2), Sse2.cmpeq_epi16(compareMask & (ushort4)0x3333, default(v128))); ushort3 fifth = Mask.BlendV(default(v128), new ushort4(1), Sse2.cmpeq_epi16(compareMask & (ushort4)0x5555, default(v128))); return((first + second) + ((third + fourth) + fifth)); } else { return(new ushort3(tzcnt(x.x), tzcnt(x.y), tzcnt(x.z))); } }
public ushort3x4(ushort v) { this.c0 = v; this.c1 = v; this.c2 = v; this.c3 = v; }
public static ushort3 lcm(short3 x, short3 y) { ushort3 absX = (ushort3)abs(x); ushort3 absY = (ushort3)abs(y); return((absX / gcd(absX, absY)) * absY); }
public ushort3x4(ushort3 c0, ushort3 c1, ushort3 c2, ushort3 c3) { this.c0 = c0; this.c1 = c1; this.c2 = c2; this.c3 = c3; }
public ushort8(ushort3 x012, ushort2 x34, ushort3 x567) { if (Sse2.IsSse2Supported) { v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(short)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(x34, hi, 0b0001_1100); hi = Sse2.bslli_si128(hi, 3 * sizeof(short)); this = Sse4_1.blend_epi16(x012, hi, 0b1111_1000); } else { hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0001_1100); hi = Sse2.bslli_si128(hi, 3 * sizeof(short)); this = Mask.BlendEpi16_SSE2(x012, hi, 0b1111_1000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x34.x; this.x4 = x34.y; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public ushort8(ushort2 x01, ushort3 x234, ushort3 x567) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(ushort)); v128 hi = Sse2.bslli_si128(x567, 5 * sizeof(ushort)); if (Sse4_1.IsSse41Supported) { hi = Sse4_1.blend_epi16(mid, hi, 0b1110_0000); this = Sse4_1.blend_epi16(x01, hi, 0b1111_1100); } else { hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1110_0000); this = Mask.BlendEpi16_SSE2(x01, hi, 0b1111_1100); } } else { this.x0 = x01.x; this.x1 = x01.y; this.x2 = x234.x; this.x3 = x234.y; this.x4 = x234.z; this.x5 = x567.x; this.x6 = x567.y; this.x7 = x567.z; } }
public static int cminpos(ushort3 x, out ushort min) { if (Sse4_1.IsSse41Supported) { v128 temp = Sse4_1.minpos_epu16(Sse2.or_si128(x, new v128(0u, 0xFFFF_0000u, uint.MaxValue, uint.MaxValue))); min = temp.UShort0; return(temp.UShort1); } else { min = cmin(x); if (min == x.x) { return(0); } else if (min == x.y) { return(1); } else { return(2); } } }
public ushort8(ushort3 x012, ushort3 x345, ushort2 x67) { if (Sse2.IsSse2Supported) { v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(ushort)); v128 hi = Sse2.bslli_si128(x67, 6 * sizeof(ushort)); if (Sse4_1.IsSse41Supported) { mid = Sse4_1.blend_epi16(x012, mid, 0b0011_1000); this = Sse4_1.blend_epi16(mid, hi, 0b1100_0000); } else { mid = Mask.BlendEpi16_SSE2(x012, mid, 0b0011_1000); this = Mask.BlendEpi16_SSE2(mid, hi, 0b1100_0000); } } else { this.x0 = x012.x; this.x1 = x012.y; this.x2 = x012.z; this.x3 = x345.x; this.x4 = x345.y; this.x5 = x345.z; this.x6 = x67.x; this.x7 = x67.y; } }
public ushort3x2(ushort m00, ushort m01, ushort m10, ushort m11, ushort m20, ushort m21) { this.c0 = new ushort3(m00, m10, m20); this.c1 = new ushort3(m01, m11, m21); }
public static ushort3 gcd(ushort3 x, ushort3 y) { if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); v128 result = ZERO; v128 result_if_zero_any = ZERO; v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO); v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO); v128 any_zero = Sse2.or_si128(x_is_zero, y_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero); v128 doneMask = any_zero; ushort3 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); if (Sse4_1.IsSse41Supported) { v128 tempX = x; x = Sse4_1.min_epu16(x, y); y = Sse4_1.max_epu16(y, tempX); } else { v128 tempX = x; v128 x_greater_y = Operator.greater_mask_ushort(x, y); x = Mask.BlendV(x, y, x_greater_y); y = Mask.BlendV(y, tempX, x_greater_y); } y -= x; v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO)); result = Mask.BlendV(result, x, loopCheck); doneMask = Sse2.or_si128(doneMask, loopCheck); } while (bitmask32(3 * sizeof(ushort)) != (bitmask32(3 * sizeof(ushort)) & Sse2.movemask_epi8(doneMask))); result = shl(result, shift); result = Mask.BlendV(result, result_if_zero_any, any_zero); return(result); } else { return(new ushort3((ushort)gcd((uint)x.x, (uint)y.x), (ushort)gcd((uint)x.y, (uint)y.y), (ushort)gcd((uint)x.z, (uint)y.z))); } }
public static ushort3 reversebits(ushort3 x) { x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1); x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2); x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4); return((x >> 8) | (x << 8)); }
public static void Constructor_UShort_UShort2() { ushort3 x = new ushort3(TestData_LHS[0].x, new ushort2(TestData_LHS[0].y, TestData_LHS[0].z)); Assert.AreEqual(x.x == TestData_LHS[0].x & x.y == TestData_LHS[0].y & x.z == TestData_LHS[0].z, true); }
public ushort3x3(ushort m00, ushort m01, ushort m02, ushort m10, ushort m11, ushort m12, ushort m20, ushort m21, ushort m22) { this.c0 = new ushort3(m00, m10, m20); this.c1 = new ushort3(m01, m11, m21); this.c2 = new ushort3(m02, m12, m22); }
public ushort3x4(ushort m00, ushort m01, ushort m02, ushort m03, ushort m10, ushort m11, ushort m12, ushort m13, ushort m20, ushort m21, ushort m22, ushort m23) { this.c0 = new ushort3(m00, m10, m20); this.c1 = new ushort3(m01, m11, m21); this.c2 = new ushort3(m02, m12, m22); this.c3 = new ushort3(m03, m13, m23); }
public static ushort3 floorpow2(ushort3 x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; return(x - (x >> 1)); }
public static ushort3 ceilpow2(ushort3 x) { x -= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; return(x + 1); }
public static ushort3 avg(ushort3 x, ushort3 y) { if (Sse2.IsSse2Supported) { return(Sse2.avg_epu16(x, y)); } else { return(new ushort3((ushort)((x.x + y.x + 1) >> 1), (ushort)((x.y + y.y + 1) >> 1), (ushort)((x.z + y.z + 1) >> 1))); } }
public static void ushort3() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { ushort3 x = rng.NextUShort3(); Assert.AreEqual(new ushort3((ushort)_intsqrt(x.x), (ushort)_intsqrt(x.y), (ushort)_intsqrt(x.z)), maxmath.intsqrt(x)); } }
public static ushort3 subadd(ushort3 a, ushort3 b) { if (Ssse3.IsSsse3Supported) { return(a + Ssse3.sign_epi16(b, new ushort4(ushort.MaxValue, 1, ushort.MaxValue, 1))); } else { return(a - select(b, (ushort3)(-(short3)b), new bool3(false, true, false))); } }
public static ushort3 andnot(ushort3 left, ushort3 right) { if (Sse2.IsSse2Supported) { return(Sse2.andnot_si128(right, left)); } else { return(left & ~right); } }
public static ushort3 divrem(ushort3 dividend, ushort divisor, out ushort3 remainder) { if (Constant.IsConstantExpression(divisor)) { remainder = dividend % divisor; return(dividend / divisor); } else { return(divrem(dividend, (ushort3)divisor, out remainder)); } }
public static void ushort3() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { ushort3 x = rng.NextUShort3(); ushort3 y = rng.NextUShort3(); Assert.AreEqual(new ushort3((ushort)_gcd(x.x, y.x), (ushort)_gcd(x.y, y.y), (ushort)_gcd(x.z, y.z)), maxmath.gcd(x, y)); } }
public static void short3() { Random16 rng = new Random16(135); for (int i = 0; i < 64; i++) { short3 x = rng.NextShort3(); ushort3 n = rng.NextUShort3(); Assert.AreEqual(new short3((short)_intpow(x.x, n.x), (short)_intpow(x.y, n.y), (short)_intpow(x.z, n.z)), maxmath.intpow(x, n)); } }
public static bool3 ispow2(ushort3 x) { if (Sse2.IsSse2Supported) { v128 result = (byte3)(new ushort3(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)), Sse2.cmpeq_epi16(default(v128), x & (x - 1)))); return(*(bool3 *)&result); } else { return(new bool3(math.ispow2((uint)x.x), math.ispow2((uint)x.y), math.ispow2((uint)x.z))); } }
public static ushort cmin(ushort3 x) { if (Sse2.IsSse2Supported) { x = min(x, x.zyz); return(min(x, x.yyy).x); } else { return((ushort)math.min((uint)x.x, math.min((uint)x.y, (uint)x.z))); } }
internal static v128 greater_mask_ushort(ushort3 left, ushort3 right) { if (Sse2.IsSse2Supported) { ushort4 mask = 1 << 15; return(Sse2.cmpgt_epi16(Sse2.xor_si128(left, mask), Sse2.xor_si128(right, mask))); } else { throw new CPUFeatureCheckException(); } }
public static void NOT() { bool result = true; for (int i = 0; i < NUM_TESTS; i++) { ushort3 x = ~TestData_LHS[i]; result &= x.x == (ushort)(~TestData_LHS[i].x) & x.y == (ushort)(~TestData_LHS[i].y) & x.z == (ushort)(~TestData_LHS[i].z); } Assert.AreEqual(true, result); }
public static void Remainder() { bool result = true; for (int i = 0; i < NUM_TESTS; i++) { ushort3 x = TestData_LHS[i] % TestData_RHS[i]; result &= x.x == (ushort)(TestData_LHS[i].x % TestData_RHS[i].x) & x.y == (ushort)(TestData_LHS[i].y % TestData_RHS[i].y) & x.z == (ushort)(TestData_LHS[i].z % TestData_RHS[i].z); } Assert.AreEqual(true, result); }
public static void Multiply() { bool result = true; for (int i = 0; i < NUM_TESTS; i++) { ushort3 x = TestData_LHS[i] * TestData_RHS[i]; result &= x.x == (ushort)(TestData_LHS[i].x * TestData_RHS[i].x) & x.y == (ushort)(TestData_LHS[i].y * TestData_RHS[i].y) & x.z == (ushort)(TestData_LHS[i].z * TestData_RHS[i].z); } Assert.AreEqual(true, result); }
public static void Subtract() { bool result = true; for (int i = 0; i < NUM_TESTS; i++) { ushort3 x = TestData_LHS[i] - TestData_RHS[i]; result &= x.x == (ushort)(TestData_LHS[i].x - TestData_RHS[i].x) & x.y == (ushort)(TestData_LHS[i].y - TestData_RHS[i].y) & x.z == (ushort)(TestData_LHS[i].z - TestData_RHS[i].z); } Assert.AreEqual(true, result); }
public static extern CUResult cuMemcpyDtoH_v2(ref ushort3 dstHost, CUdeviceptr srcDevice, SizeT ByteCount);