예제 #1
0
        public ushort8(ushort2 x01, ushort3 x234, ushort3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(ushort));
                v128 hi  = Sse2.bslli_si128(x567, 5 * sizeof(ushort));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(mid, hi, 0b1110_0000);

                    this = Sse4_1.blend_epi16(x01, hi, 0b1111_1100);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1110_0000);

                    this = Mask.BlendEpi16_SSE2(x01, hi, 0b1111_1100);
                }
            }
            else
            {
                this.x0 = x01.x;
                this.x1 = x01.y;
                this.x2 = x234.x;
                this.x3 = x234.y;
                this.x4 = x234.z;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
예제 #2
0
 public ushort2x3(ushort m00, ushort m01, ushort m02,
                  ushort m10, ushort m11, ushort m12)
 {
     this.c0 = new ushort2(m00, m10);
     this.c1 = new ushort2(m01, m11);
     this.c2 = new ushort2(m02, m12);
 }
예제 #3
0
        public static ushort2 lcm(short2 x, short2 y)
        {
            ushort2 absX = (ushort2)abs(x);
            ushort2 absY = (ushort2)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
예제 #4
0
        public ushort8(ushort3 x012, ushort2 x34, ushort3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(short));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(x34, hi, 0b0001_1100);
                    hi = Sse2.bslli_si128(hi, 3 * sizeof(short));

                    this = Sse4_1.blend_epi16(x012, hi, 0b1111_1000);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0001_1100);
                    hi = Sse2.bslli_si128(hi, 3 * sizeof(short));

                    this = Mask.BlendEpi16_SSE2(x012, hi, 0b1111_1000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x34.x;
                this.x4 = x34.y;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
예제 #5
0
        public ushort8(ushort3 x012, ushort3 x345, ushort2 x67)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(ushort));
                v128 hi  = Sse2.bslli_si128(x67, 6 * sizeof(ushort));

                if (Sse4_1.IsSse41Supported)
                {
                    mid = Sse4_1.blend_epi16(x012, mid, 0b0011_1000);

                    this = Sse4_1.blend_epi16(mid, hi, 0b1100_0000);
                }
                else
                {
                    mid = Mask.BlendEpi16_SSE2(x012, mid, 0b0011_1000);

                    this = Mask.BlendEpi16_SSE2(mid, hi, 0b1100_0000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x345.x;
                this.x4 = x345.y;
                this.x5 = x345.z;
                this.x6 = x67.x;
                this.x7 = x67.y;
            }
        }
예제 #6
0
 public ushort2x4(ushort v)
 {
     this.c0 = v;
     this.c1 = v;
     this.c2 = v;
     this.c3 = v;
 }
예제 #7
0
 public ushort2x4(ushort2 c0, ushort2 c1, ushort2 c2, ushort2 c3)
 {
     this.c0 = c0;
     this.c1 = c1;
     this.c2 = c2;
     this.c3 = c3;
 }
예제 #8
0
        public static ushort2 tzcnt(ushort2 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(16, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(16, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                v128 tzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                                 Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))));

                return(Sse2.min_epu8(tzcnt_bytes,
                                     Sse2.srli_epi16(Sse2.add_epi8(tzcnt_bytes, Sse2.set1_epi8(8)), 8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (ushort2)(-((short2)x));

                ushort2 first  = Mask.BlendV(default(v128), new ushort2(1), Sse2.cmpeq_epi16(compareMask, default(v128)));
                ushort2 second = Mask.BlendV(default(v128), new ushort2(8), Sse2.cmpeq_epi16(compareMask & (ushort2)0x00FF, default(v128)));
                ushort2 third  = Mask.BlendV(default(v128), new ushort2(4), Sse2.cmpeq_epi16(compareMask & (ushort2)0x0F0F, default(v128)));
                ushort2 fourth = Mask.BlendV(default(v128), new ushort2(2), Sse2.cmpeq_epi16(compareMask & (ushort2)0x3333, default(v128)));
                ushort2 fifth  = Mask.BlendV(default(v128), new ushort2(1), Sse2.cmpeq_epi16(compareMask & (ushort2)0x5555, default(v128)));

                return((first + second) + ((third + fourth) + fifth));
            }
            else
            {
                return(new ushort2(tzcnt(x.x), tzcnt(x.y)));
            }
        }
예제 #9
0
        public static ushort2 gcd(ushort2 x, ushort2 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero);

                v128 doneMask = any_zero;

                ushort2 shift = tzcnt(x | y);

                x = shrl(x, tzcnt(x));

                do
                {
                    y = shrl(y, tzcnt(y));

                    if (Sse4_1.IsSse41Supported)
                    {
                        v128 tempX = x;

                        x = Sse4_1.min_epu16(x, y);
                        y = Sse4_1.max_epu16(y, tempX);
                    }
                    else
                    {
                        v128 tempX       = x;
                        v128 x_greater_y = Operator.greater_mask_ushort(x, y);

                        x = Mask.BlendV(x, y, x_greater_y);
                        y = Mask.BlendV(y, tempX, x_greater_y);
                    }

                    y -= x;

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO));
                    result   = Mask.BlendV(result, x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (-1 != doneMask.SInt0);

                result = shl(result, shift);

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(result);
            }
            else
            {
                return(new ushort2((ushort)gcd((uint)x.x, (uint)y.x), (ushort)gcd((uint)x.y, (uint)y.y)));
            }
        }
예제 #10
0
 public ushort2x4(ushort m00, ushort m01, ushort m02, ushort m03,
                  ushort m10, ushort m11, ushort m12, ushort m13)
 {
     this.c0 = new ushort2(m00, m10);
     this.c1 = new ushort2(m01, m11);
     this.c2 = new ushort2(m02, m12);
     this.c3 = new ushort2(m03, m13);
 }
예제 #11
0
        public static ushort2 reversebits(ushort2 x)
        {
            x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1);
            x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2);
            x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4);

            return((x >> 8) | (x << 8));
        }
예제 #12
0
        public static ushort2 floorpow2(ushort2 x)
        {
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x - (x >> 1));
        }
예제 #13
0
        public static ushort2 ceilpow2(ushort2 x)
        {
            x -= 1;
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x + 1);
        }
예제 #14
0
파일: Average.cs 프로젝트: csritter/MaxMath
 public static ushort avg(ushort2 c)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu16(c, Sse2.bsrli_si128(c, 1 * sizeof(ushort))).UShort0);
     }
     else
     {
         return((ushort)((1u + csum(c)) / 2u));
     }
 }
예제 #15
0
파일: Average.cs 프로젝트: csritter/MaxMath
 public static ushort2 avg(ushort2 x, ushort2 y)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu16(x, y));
     }
     else
     {
         return(new ushort2((ushort)((x.x + y.x + 1) >> 1), (ushort)((x.y + y.y + 1) >> 1)));
     }
 }
예제 #16
0
 public static ushort cmin(ushort2 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(min(x, x.yy).x);
     }
     else
     {
         return((ushort)math.min((uint)x.x, (uint)x.y));
     }
 }
예제 #17
0
파일: AndNot.cs 프로젝트: csritter/MaxMath
 public static ushort2 andnot(ushort2 left, ushort2 right)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.andnot_si128(right, left));
     }
     else
     {
         return(left & ~right);
     }
 }
예제 #18
0
 public static ushort2 subadd(ushort2 a, ushort2 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi16(b, new ushort2(ushort.MaxValue, 1)));
     }
     else
     {
         return(a - select(b, (ushort2)(-(short2)b), new bool2(false, true)));
     }
 }
예제 #19
0
 public static ushort2 divrem(ushort2 dividend, ushort divisor, out ushort2 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (ushort2)divisor, out remainder));
     }
 }
예제 #20
0
 public static bool2 ispow2(ushort2 x)
 {
     if (Sse2.IsSse2Supported)
     {
         v128 result = (byte2)(new ushort2(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)),
                                                               Sse2.cmpeq_epi16(default(v128), x & (x - 1))));
         return(*(bool2 *)&result);
     }
     else
     {
         return(new bool2(math.ispow2((uint)x.x), math.ispow2((uint)x.y)));
     }
 }
예제 #21
0
        internal static v128 greater_mask_ushort(ushort2 left, ushort2 right)
        {
            if (Sse2.IsSse2Supported)
            {
                ushort2 mask = 1 << 15;

                return(Sse2.cmpgt_epi16(Sse2.xor_si128(left, mask),
                                        Sse2.xor_si128(right, mask)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
예제 #22
0
        public ushort2 NextUShort2(ushort2 max)
        {
            Assert.IsPositive(max.x);
            Assert.IsPositive(max.y);

            if (Sse2.IsSse2Supported)
            {
                return(Sse2.mulhi_epi16(max, new ushort2((ushort)NextState(), (ushort)NextState())));
            }
            else
            {
                return((ushort2)(((uint2)max * new uint2(NextState(), NextState())) >> 16));
            }
        }
예제 #23
0
        public ushort2 NextUShort2(ushort2 min, ushort2 max)
        {
            Assert.IsNotSmaller(max.x, min.x);
            Assert.IsNotSmaller(max.y, min.y);

            if (Sse2.IsSse2Supported)
            {
                return(min + Sse2.mulhi_epi16(max - min, new ushort2((ushort)NextState(), (ushort)NextState())));
            }
            else
            {
                return(min + (ushort2)(((uint2)(max - min) * new uint2(NextState(), NextState())) >> 16));
            }
        }
예제 #24
0
파일: Max.cs 프로젝트: csritter/MaxMath
 public static ushort2 max(ushort2 a, ushort2 b)
 {
     if (Sse4_1.IsSse41Supported)
     {
         return(Sse4_1.max_epu16(a, b));
     }
     else if (Sse2.IsSse2Supported)
     {
         return(Mask.BlendV(a, b, Operator.greater_mask_ushort(b, a)));
     }
     else
     {
         return(new ushort2((ushort)math.max((uint)a.x, (uint)b.x), (ushort)math.max((uint)a.y, (uint)b.y)));
     }
 }
예제 #25
0
        public static short2 compareto(ushort2 x, ushort2 y)
        {
            if (Sse2.IsSse2Supported)
            {
                short2 xGreatery = Operator.greater_mask_ushort(x, y);
                short2 yGreaterx = Operator.greater_mask_ushort(y, x);

                return((0 - xGreatery) + yGreaterx);
            }
            else
            {
                return(new short2((short)compareto(x.x, y.x),
                                  (short)compareto(x.y, y.y)));
            }
        }
예제 #26
0
        public static bool2 isdivisible(ushort2 dividend, ushort2 divisor)
        {
            Assert.AreNotEqual(0, divisor.x);
            Assert.AreNotEqual(0, divisor.y);

            if (Constant.IsConstantExpression(divisor))
            {
                uint2 compile = (new uint2(uint.MaxValue) / divisor) + 1;

                return(dividend * compile <= compile - 1);
            }
            else
            {
                return(dividend % divisor == 0);
            }
        }
예제 #27
0
        public sbyte2 NextSByte2(sbyte2 min, sbyte2 max)
        {
            Assert.IsNotSmaller(max.x, min.x);
            Assert.IsNotSmaller(max.y, min.y);

            if (Ssse3.IsSsse3Supported)
            {
                ushort2 temp = (ushort2)(max - min) * new ushort2(NextState(), NextState());

                return(min + Ssse3.shuffle_epi8(temp, new byte4(1, 3, 0, 0)));
            }
            else
            {
                return(min + (sbyte2)(((ushort2)(max - min) * new ushort2(NextState(), NextState())) >> 8));
            }
        }
예제 #28
0
        public static ushort2 divrem(ushort2 dividend, ushort2 divisor, out ushort2 remainder)
        {
            if (Sse2.IsSse2Supported)
            {
                ushort2 quotient = dividend / divisor;
                remainder = dividend - (quotient * divisor);

                return(quotient);
            }
            else
            {
                remainder = dividend % divisor;

                return(dividend / divisor);
            }
        }
예제 #29
0
        public static ushort2 lzcnt(ushort2 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(16, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4);
                v128 SHUFFLE_MASK_HI = new v128(16, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);

                v128 lzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                                 Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))));

                return(Sse2.min_epu8(Sse2.add_epi8(lzcnt_bytes, Sse2.set1_epi16(8)),
                                     Sse2.srli_epi16(lzcnt_bytes, 8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                ushort2 y;
                ushort2 n = 16;
                ushort2 mask;

                y    = x >> 8;
                mask = Sse2.cmpeq_epi16(y, default(v128));
                n    = Mask.BlendV(n - 8, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 4;
                mask = Sse2.cmpeq_epi16(y, default(v128));
                n    = Mask.BlendV(n - 4, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 2;
                mask = Sse2.cmpeq_epi16(y, default(v128));
                n    = Mask.BlendV(n - 2, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 1;
                mask = Sse2.cmpeq_epi16(y, default(v128));

                return(Mask.BlendV(n - 2, n - x, mask));
            }
            else
            {
                return(new ushort2(lzcnt(x.x), lzcnt(x.y)));
            }
        }
예제 #30
0
 public ushort8(ushort2 x01, ushort2 x23, ushort2 x45, ushort2 x67)
 {
     if (Sse2.IsSse2Supported)
     {
         this = new ushort8(new ushort4(x01, x23), new ushort4(x45, x67));
     }
     else
     {
         this.x0 = x01.x;
         this.x1 = x01.y;
         this.x2 = x23.x;
         this.x3 = x23.y;
         this.x4 = x45.x;
         this.x5 = x45.y;
         this.x6 = x67.x;
         this.x7 = x67.y;
     }
 }