Пример #1
0
        public static ushort4 gcd(ushort4 x, ushort4 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero);

                v128 doneMask = any_zero;

                ushort4 shift = tzcnt(x | y);

                x = shrl(x, tzcnt(x));

                do
                {
                    y = shrl(y, tzcnt(y));

                    if (Sse4_1.IsSse41Supported)
                    {
                        v128 tempX = x;

                        x = Sse4_1.min_epu16(x, y);
                        y = Sse4_1.max_epu16(y, tempX);
                    }
                    else
                    {
                        v128 tempX       = x;
                        v128 x_greater_y = Operator.greater_mask_ushort(x, y);

                        x = Mask.BlendV(x, y, x_greater_y);
                        y = Mask.BlendV(y, tempX, x_greater_y);
                    }

                    y -= x;

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO));
                    result   = Mask.BlendV(result, x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (-1 != doneMask.SLong0);

                result = shl(result, shift);

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(result);
            }
            else
            {
                return(new ushort4((ushort)gcd((uint)x.x, (uint)y.x), (ushort)gcd((uint)x.y, (uint)y.y), (ushort)gcd((uint)x.z, (uint)y.z), (ushort)gcd((uint)x.w, (uint)y.w)));
            }
        }
Пример #2
0
 public static bool8 operator >=(ushort8 left, ushort8 right)
 {
     if (Sse4_1.IsSse41Supported)
     {
         return(TestIsTrue(Sse2.cmpeq_epi16(Sse4_1.max_epu16(left, right), left)));
     }
     else if (Sse2.IsSse2Supported)
     {
         return(TestIsFalse(Operator.greater_mask_ushort(right, left)));
     }
     else
     {
         return(new bool8(left.x0 >= right.x0, left.x1 >= right.x1, left.x2 >= right.x2, left.x3 >= right.x3, left.x4 >= right.x4, left.x5 >= right.x5, left.x6 >= right.x6, left.x7 >= right.x7));
     }
 }
Пример #3
0
 public static bool2 operator >=(ushort2 left, ushort2 right)
 {
     if (Sse4_1.IsSse41Supported)
     {
         return(TestIsTrue(Sse2.cmpeq_epi16(Sse4_1.max_epu16(left, right), left)));
     }
     else if (Sse2.IsSse2Supported)
     {
         return(TestIsFalse(Operator.greater_mask_ushort(right, left)));
     }
     else
     {
         return(new bool2(left.x >= right.x, left.y >= right.y));
     }
 }
Пример #4
0
 public static ushort8 max(ushort8 a, ushort8 b)
 {
     if (Sse4_1.IsSse41Supported)
     {
         return(Sse4_1.max_epu16(a, b));
     }
     else if (Sse2.IsSse2Supported)
     {
         return(Mask.BlendV(a, b, Operator.greater_mask_ushort(b, a)));
     }
     else
     {
         return(new ushort8((ushort)math.max((uint)a.x0, (uint)b.x0), (ushort)math.max((uint)a.x1, (uint)b.x1), (ushort)math.max((uint)a.x2, (uint)b.x2), (ushort)math.max((uint)a.x3, (uint)b.x3), (ushort)math.max((uint)a.x4, (uint)b.x4), (ushort)math.max((uint)a.x5, (uint)b.x5), (ushort)math.max((uint)a.x6, (uint)b.x6), (ushort)math.max((uint)a.x7, (uint)b.x7)));
     }
 }
Пример #5
0
 public static ushort4 max(ushort4 a, ushort4 b)
 {
     if (Sse4_1.IsSse41Supported)
     {
         return(Sse4_1.max_epu16(a, b));
     }
     else if (Sse2.IsSse2Supported)
     {
         return(Mask.BlendV(a, b, Operator.greater_mask_ushort(b, a)));
     }
     else
     {
         return(new ushort4((ushort)math.max((uint)a.x, (uint)b.x), (ushort)math.max((uint)a.y, (uint)b.y), (ushort)math.max((uint)a.z, (uint)b.z), (ushort)math.max((uint)a.w, (uint)b.w)));
     }
 }
Пример #6
0
        public static ushort8 gcd(ushort8 x, ushort8 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero);

                v128 doneMask = any_zero;

                ushort8 shift = tzcnt(x | y);

                x = shrl(x, tzcnt(x));

                do
                {
                    y = shrl(y, tzcnt(y));

                    if (Sse4_1.IsSse41Supported)
                    {
                        v128 tempX = x;

                        x = Sse4_1.min_epu16(x, y);
                        y = Sse4_1.max_epu16(y, tempX);
                    }
                    else
                    {
                        v128 tempX       = x;
                        v128 x_greater_y = Operator.greater_mask_ushort(x, y);

                        x = Mask.BlendV(x, y, x_greater_y);
                        y = Mask.BlendV(y, tempX, x_greater_y);
                    }

                    y -= x;

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO));
                    result   = Mask.BlendV(result, x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (bitmask32(8 * sizeof(ushort)) != Sse2.movemask_epi8(doneMask));

                result = shl(result, shift);

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(result);
            }
            else
            {
                return(new ushort8((ushort)gcd((uint)x.x0, (uint)y.x0),
                                   (ushort)gcd((uint)x.x1, (uint)y.x1),
                                   (ushort)gcd((uint)x.x2, (uint)y.x2),
                                   (ushort)gcd((uint)x.x3, (uint)y.x3),
                                   (ushort)gcd((uint)x.x4, (uint)y.x4),
                                   (ushort)gcd((uint)x.x5, (uint)y.x5),
                                   (ushort)gcd((uint)x.x6, (uint)y.x6),
                                   (ushort)gcd((uint)x.x7, (uint)y.x7)));
            }
        }