예제 #1
0
        internal static v128 equals_mask_long(v128 x, v128 y)
        {
            if (Sse4_1.IsSse41Supported)
            {
                return(Sse4_1.cmpeq_epi64(x, y));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 cmpeq32 = Sse2.cmpeq_epi32(x, y);

                cmpeq32 = Sse2.and_si128(cmpeq32, Sse2.srli_epi64(cmpeq32, 32));

                return(Sse2.shuffle_epi32(cmpeq32, Sse.SHUFFLE(2, 2, 0, 0)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
예제 #2
0
        public static long2 intpow(long2 x, ulong2 n)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = long2.zero;
                v128 ONE  = new long2(1);

                v128 doneMask = ZERO;
                v128 result   = ZERO;

                v128 p = x;
                v128 y = ONE;


Loop:
                v128 y_times_p = Operator.mul_long(y, p);
                y = Mask.BlendV(y, y_times_p, Operator.equals_mask_long(ONE, ONE & n));

                n >>= 1;

                v128 n_is_zero = Sse4_1.cmpeq_epi64(ZERO, n);
                result   = Mask.BlendV(result, y, Sse2.andnot_si128(doneMask, n_is_zero));
                doneMask = n_is_zero;


                if (bitmask32(2 * sizeof(long)) != Sse2.movemask_epi8(doneMask))
                {
                    p = Operator.mul_long(p, p);

                    goto Loop;
                }
                else
                {
                    return(result);
                }
            }
            else
            {
                return(new long2(intpow(x.x, n.x), intpow(x.y, n.y)));
            }
        }