internal static v128 equals_mask_long(v128 x, v128 y) { if (Sse4_1.IsSse41Supported) { return(Sse4_1.cmpeq_epi64(x, y)); } else if (Sse2.IsSse2Supported) { v128 cmpeq32 = Sse2.cmpeq_epi32(x, y); cmpeq32 = Sse2.and_si128(cmpeq32, Sse2.srli_epi64(cmpeq32, 32)); return(Sse2.shuffle_epi32(cmpeq32, Sse.SHUFFLE(2, 2, 0, 0))); } else { throw new CPUFeatureCheckException(); } }
public static long2 intpow(long2 x, ulong2 n) { if (Sse2.IsSse2Supported) { v128 ZERO = long2.zero; v128 ONE = new long2(1); v128 doneMask = ZERO; v128 result = ZERO; v128 p = x; v128 y = ONE; Loop: v128 y_times_p = Operator.mul_long(y, p); y = Mask.BlendV(y, y_times_p, Operator.equals_mask_long(ONE, ONE & n)); n >>= 1; v128 n_is_zero = Sse4_1.cmpeq_epi64(ZERO, n); result = Mask.BlendV(result, y, Sse2.andnot_si128(doneMask, n_is_zero)); doneMask = n_is_zero; if (bitmask32(2 * sizeof(long)) != Sse2.movemask_epi8(doneMask)) { p = Operator.mul_long(p, p); goto Loop; } else { return(result); } } else { return(new long2(intpow(x.x, n.x), intpow(x.y, n.y))); } }