public static ushort4 gcd(ushort4 x, ushort4 y) { if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); v128 result = ZERO; v128 result_if_zero_any = ZERO; v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO); v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO); v128 any_zero = Sse2.or_si128(x_is_zero, y_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero); v128 doneMask = any_zero; ushort4 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); if (Sse4_1.IsSse41Supported) { v128 tempX = x; x = Sse4_1.min_epu16(x, y); y = Sse4_1.max_epu16(y, tempX); } else { v128 tempX = x; v128 x_greater_y = Operator.greater_mask_ushort(x, y); x = Mask.BlendV(x, y, x_greater_y); y = Mask.BlendV(y, tempX, x_greater_y); } y -= x; v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO)); result = Mask.BlendV(result, x, loopCheck); doneMask = Sse2.or_si128(doneMask, loopCheck); } while (-1 != doneMask.SLong0); result = shl(result, shift); result = Mask.BlendV(result, result_if_zero_any, any_zero); return(result); } else { return(new ushort4((ushort)gcd((uint)x.x, (uint)y.x), (ushort)gcd((uint)x.y, (uint)y.y), (ushort)gcd((uint)x.z, (uint)y.z), (ushort)gcd((uint)x.w, (uint)y.w))); } }
public static bool8 operator <=(ushort8 left, ushort8 right) { if (Sse4_1.IsSse41Supported) { return(TestIsTrue(Sse2.cmpeq_epi16(Sse4_1.min_epu16(left, right), left))); } else if (Sse2.IsSse2Supported) { return(TestIsFalse(Operator.greater_mask_ushort(left, right))); } else { return(new bool8(left.x0 <= right.x0, left.x1 <= right.x1, left.x2 <= right.x2, left.x3 <= right.x3, left.x4 <= right.x4, left.x5 <= right.x5, left.x6 <= right.x6, left.x7 <= right.x7)); } }
public static bool2 operator <=(ushort2 left, ushort2 right) { if (Sse4_1.IsSse41Supported) { return(TestIsTrue(Sse2.cmpeq_epi16(Sse4_1.min_epu16(left, right), left))); } else if (Sse2.IsSse2Supported) { return(TestIsFalse(Operator.greater_mask_ushort(left, right))); } else { return(new bool2(left.x <= right.x, left.y <= right.y)); } }
public static ushort8 min(ushort8 a, ushort8 b) { if (Sse4_1.IsSse41Supported) { return(Sse4_1.min_epu16(a, b)); } else if (Sse2.IsSse2Supported) { return(Mask.BlendV(a, b, Operator.greater_mask_ushort(a, b))); } else { return(new ushort8((ushort)math.min((uint)a.x0, (uint)b.x0), (ushort)math.min((uint)a.x1, (uint)b.x1), (ushort)math.min((uint)a.x2, (uint)b.x2), (ushort)math.min((uint)a.x3, (uint)b.x3), (ushort)math.min((uint)a.x4, (uint)b.x4), (ushort)math.min((uint)a.x5, (uint)b.x5), (ushort)math.min((uint)a.x6, (uint)b.x6), (ushort)math.min((uint)a.x7, (uint)b.x7))); } }
public static ushort4 min(ushort4 a, ushort4 b) { if (Sse4_1.IsSse41Supported) { return(Sse4_1.min_epu16(a, b)); } else if (Sse2.IsSse2Supported) { return(Mask.BlendV(a, b, Operator.greater_mask_ushort(a, b))); } else { return(new ushort4((ushort)math.min((uint)a.x, (uint)b.x), (ushort)math.min((uint)a.y, (uint)b.y), (ushort)math.min((uint)a.z, (uint)b.z), (ushort)math.min((uint)a.w, (uint)b.w))); } }
public static ushort8 gcd(ushort8 x, ushort8 y) { if (Sse2.IsSse2Supported) { v128 ZERO = default(v128); v128 result = ZERO; v128 result_if_zero_any = ZERO; v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO); v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO); v128 any_zero = Sse2.or_si128(x_is_zero, y_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero); result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero); v128 doneMask = any_zero; ushort8 shift = tzcnt(x | y); x = shrl(x, tzcnt(x)); do { y = shrl(y, tzcnt(y)); if (Sse4_1.IsSse41Supported) { v128 tempX = x; x = Sse4_1.min_epu16(x, y); y = Sse4_1.max_epu16(y, tempX); } else { v128 tempX = x; v128 x_greater_y = Operator.greater_mask_ushort(x, y); x = Mask.BlendV(x, y, x_greater_y); y = Mask.BlendV(y, tempX, x_greater_y); } y -= x; v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO)); result = Mask.BlendV(result, x, loopCheck); doneMask = Sse2.or_si128(doneMask, loopCheck); } while (bitmask32(8 * sizeof(ushort)) != Sse2.movemask_epi8(doneMask)); result = shl(result, shift); result = Mask.BlendV(result, result_if_zero_any, any_zero); return(result); } else { return(new ushort8((ushort)gcd((uint)x.x0, (uint)y.x0), (ushort)gcd((uint)x.x1, (uint)y.x1), (ushort)gcd((uint)x.x2, (uint)y.x2), (ushort)gcd((uint)x.x3, (uint)y.x3), (ushort)gcd((uint)x.x4, (uint)y.x4), (ushort)gcd((uint)x.x5, (uint)y.x5), (ushort)gcd((uint)x.x6, (uint)y.x6), (ushort)gcd((uint)x.x7, (uint)y.x7))); } }