Beispiel #1
0
        public static ushort3 tzcnt(ushort3 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(16, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(16, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                v128 tzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                                 Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))));

                return(Sse2.min_epu8(tzcnt_bytes,
                                     Sse2.srli_epi16(Sse2.add_epi8(tzcnt_bytes, Sse2.set1_epi8(8)), 8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (ushort3)(-((short3)x));

                ushort3 first  = Mask.BlendV(default(v128), new ushort4(1), Sse2.cmpeq_epi16(compareMask, default(v128)));
                ushort3 second = Mask.BlendV(default(v128), new ushort4(8), Sse2.cmpeq_epi16(compareMask & (ushort4)0x00FF, default(v128)));
                ushort3 third  = Mask.BlendV(default(v128), new ushort4(4), Sse2.cmpeq_epi16(compareMask & (ushort4)0x0F0F, default(v128)));
                ushort3 fourth = Mask.BlendV(default(v128), new ushort4(2), Sse2.cmpeq_epi16(compareMask & (ushort4)0x3333, default(v128)));
                ushort3 fifth  = Mask.BlendV(default(v128), new ushort4(1), Sse2.cmpeq_epi16(compareMask & (ushort4)0x5555, default(v128)));

                return((first + second) + ((third + fourth) + fifth));
            }
            else
            {
                return(new ushort3(tzcnt(x.x), tzcnt(x.y), tzcnt(x.z)));
            }
        }
Beispiel #2
0
 public ushort3x4(ushort v)
 {
     this.c0 = v;
     this.c1 = v;
     this.c2 = v;
     this.c3 = v;
 }
        public static ushort3 lcm(short3 x, short3 y)
        {
            ushort3 absX = (ushort3)abs(x);
            ushort3 absY = (ushort3)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
Beispiel #4
0
 public ushort3x4(ushort3 c0, ushort3 c1, ushort3 c2, ushort3 c3)
 {
     this.c0 = c0;
     this.c1 = c1;
     this.c2 = c2;
     this.c3 = c3;
 }
Beispiel #5
0
        public ushort8(ushort3 x012, ushort2 x34, ushort3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(short));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(x34, hi, 0b0001_1100);
                    hi = Sse2.bslli_si128(hi, 3 * sizeof(short));

                    this = Sse4_1.blend_epi16(x012, hi, 0b1111_1000);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0001_1100);
                    hi = Sse2.bslli_si128(hi, 3 * sizeof(short));

                    this = Mask.BlendEpi16_SSE2(x012, hi, 0b1111_1000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x34.x;
                this.x4 = x34.y;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
Beispiel #6
0
        public ushort8(ushort2 x01, ushort3 x234, ushort3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(ushort));
                v128 hi  = Sse2.bslli_si128(x567, 5 * sizeof(ushort));

                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(mid, hi, 0b1110_0000);

                    this = Sse4_1.blend_epi16(x01, hi, 0b1111_1100);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(mid, hi, 0b1110_0000);

                    this = Mask.BlendEpi16_SSE2(x01, hi, 0b1111_1100);
                }
            }
            else
            {
                this.x0 = x01.x;
                this.x1 = x01.y;
                this.x2 = x234.x;
                this.x3 = x234.y;
                this.x4 = x234.z;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
        public static int cminpos(ushort3 x, out ushort min)
        {
            if (Sse4_1.IsSse41Supported)
            {
                v128 temp = Sse4_1.minpos_epu16(Sse2.or_si128(x, new v128(0u, 0xFFFF_0000u, uint.MaxValue, uint.MaxValue)));
                min = temp.UShort0;

                return(temp.UShort1);
            }
            else
            {
                min = cmin(x);

                if (min == x.x)
                {
                    return(0);
                }
                else if (min == x.y)
                {
                    return(1);
                }
                else
                {
                    return(2);
                }
            }
        }
Beispiel #8
0
        public ushort8(ushort3 x012, ushort3 x345, ushort2 x67)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(ushort));
                v128 hi  = Sse2.bslli_si128(x67, 6 * sizeof(ushort));

                if (Sse4_1.IsSse41Supported)
                {
                    mid = Sse4_1.blend_epi16(x012, mid, 0b0011_1000);

                    this = Sse4_1.blend_epi16(mid, hi, 0b1100_0000);
                }
                else
                {
                    mid = Mask.BlendEpi16_SSE2(x012, mid, 0b0011_1000);

                    this = Mask.BlendEpi16_SSE2(mid, hi, 0b1100_0000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x345.x;
                this.x4 = x345.y;
                this.x5 = x345.z;
                this.x6 = x67.x;
                this.x7 = x67.y;
            }
        }
Beispiel #9
0
 public ushort3x2(ushort m00, ushort m01,
                  ushort m10, ushort m11,
                  ushort m20, ushort m21)
 {
     this.c0 = new ushort3(m00, m10, m20);
     this.c1 = new ushort3(m01, m11, m21);
 }
Beispiel #10
0
        public static ushort3 gcd(ushort3 x, ushort3 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero);

                v128 doneMask = any_zero;

                ushort3 shift = tzcnt(x | y);

                x = shrl(x, tzcnt(x));

                do
                {
                    y = shrl(y, tzcnt(y));

                    if (Sse4_1.IsSse41Supported)
                    {
                        v128 tempX = x;

                        x = Sse4_1.min_epu16(x, y);
                        y = Sse4_1.max_epu16(y, tempX);
                    }
                    else
                    {
                        v128 tempX       = x;
                        v128 x_greater_y = Operator.greater_mask_ushort(x, y);

                        x = Mask.BlendV(x, y, x_greater_y);
                        y = Mask.BlendV(y, tempX, x_greater_y);
                    }

                    y -= x;

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO));
                    result   = Mask.BlendV(result, x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (bitmask32(3 * sizeof(ushort)) != (bitmask32(3 * sizeof(ushort)) & Sse2.movemask_epi8(doneMask)));

                result = shl(result, shift);

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(result);
            }
            else
            {
                return(new ushort3((ushort)gcd((uint)x.x, (uint)y.x), (ushort)gcd((uint)x.y, (uint)y.y), (ushort)gcd((uint)x.z, (uint)y.z)));
            }
        }
Beispiel #11
0
        public static ushort3 reversebits(ushort3 x)
        {
            x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1);
            x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2);
            x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4);

            return((x >> 8) | (x << 8));
        }
Beispiel #12
0
        public static void Constructor_UShort_UShort2()
        {
            ushort3 x = new ushort3(TestData_LHS[0].x, new ushort2(TestData_LHS[0].y, TestData_LHS[0].z));

            Assert.AreEqual(x.x == TestData_LHS[0].x &
                            x.y == TestData_LHS[0].y &
                            x.z == TestData_LHS[0].z, true);
        }
Beispiel #13
0
 public ushort3x3(ushort m00, ushort m01, ushort m02,
                  ushort m10, ushort m11, ushort m12,
                  ushort m20, ushort m21, ushort m22)
 {
     this.c0 = new ushort3(m00, m10, m20);
     this.c1 = new ushort3(m01, m11, m21);
     this.c2 = new ushort3(m02, m12, m22);
 }
Beispiel #14
0
 public ushort3x4(ushort m00, ushort m01, ushort m02, ushort m03,
                  ushort m10, ushort m11, ushort m12, ushort m13,
                  ushort m20, ushort m21, ushort m22, ushort m23)
 {
     this.c0 = new ushort3(m00, m10, m20);
     this.c1 = new ushort3(m01, m11, m21);
     this.c2 = new ushort3(m02, m12, m22);
     this.c3 = new ushort3(m03, m13, m23);
 }
Beispiel #15
0
        public static ushort3 floorpow2(ushort3 x)
        {
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x - (x >> 1));
        }
Beispiel #16
0
        public static ushort3 ceilpow2(ushort3 x)
        {
            x -= 1;
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x + 1);
        }
Beispiel #17
0
 public static ushort3 avg(ushort3 x, ushort3 y)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu16(x, y));
     }
     else
     {
         return(new ushort3((ushort)((x.x + y.x + 1) >> 1), (ushort)((x.y + y.y + 1) >> 1), (ushort)((x.z + y.z + 1) >> 1)));
     }
 }
Beispiel #18
0
        public static void ushort3()
        {
            Random16 rng = new Random16(135);

            for (int i = 0; i < 64; i++)
            {
                ushort3 x = rng.NextUShort3();

                Assert.AreEqual(new ushort3((ushort)_intsqrt(x.x), (ushort)_intsqrt(x.y), (ushort)_intsqrt(x.z)), maxmath.intsqrt(x));
            }
        }
Beispiel #19
0
 public static ushort3 subadd(ushort3 a, ushort3 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi16(b, new ushort4(ushort.MaxValue, 1, ushort.MaxValue, 1)));
     }
     else
     {
         return(a - select(b, (ushort3)(-(short3)b), new bool3(false, true, false)));
     }
 }
Beispiel #20
0
 public static ushort3 andnot(ushort3 left, ushort3 right)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.andnot_si128(right, left));
     }
     else
     {
         return(left & ~right);
     }
 }
 public static ushort3 divrem(ushort3 dividend, ushort divisor, out ushort3 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (ushort3)divisor, out remainder));
     }
 }
Beispiel #22
0
        public static void ushort3()
        {
            Random16 rng = new Random16(135);

            for (int i = 0; i < 64; i++)
            {
                ushort3 x = rng.NextUShort3();
                ushort3 y = rng.NextUShort3();

                Assert.AreEqual(new ushort3((ushort)_gcd(x.x, y.x), (ushort)_gcd(x.y, y.y), (ushort)_gcd(x.z, y.z)), maxmath.gcd(x, y));
            }
        }
Beispiel #23
0
        public static void short3()
        {
            Random16 rng = new Random16(135);

            for (int i = 0; i < 64; i++)
            {
                short3  x = rng.NextShort3();
                ushort3 n = rng.NextUShort3();

                Assert.AreEqual(new short3((short)_intpow(x.x, n.x), (short)_intpow(x.y, n.y), (short)_intpow(x.z, n.z)), maxmath.intpow(x, n));
            }
        }
Beispiel #24
0
 public static bool3 ispow2(ushort3 x)
 {
     if (Sse2.IsSse2Supported)
     {
         v128 result = (byte3)(new ushort3(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)),
                                                               Sse2.cmpeq_epi16(default(v128), x & (x - 1))));
         return(*(bool3 *)&result);
     }
     else
     {
         return(new bool3(math.ispow2((uint)x.x), math.ispow2((uint)x.y), math.ispow2((uint)x.z)));
     }
 }
Beispiel #25
0
        public static ushort cmin(ushort3 x)
        {
            if (Sse2.IsSse2Supported)
            {
                x = min(x, x.zyz);

                return(min(x, x.yyy).x);
            }
            else
            {
                return((ushort)math.min((uint)x.x, math.min((uint)x.y, (uint)x.z)));
            }
        }
Beispiel #26
0
        internal static v128 greater_mask_ushort(ushort3 left, ushort3 right)
        {
            if (Sse2.IsSse2Supported)
            {
                ushort4 mask = 1 << 15;

                return(Sse2.cmpgt_epi16(Sse2.xor_si128(left, mask),
                                        Sse2.xor_si128(right, mask)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #27
0
        public static void NOT()
        {
            bool result = true;

            for (int i = 0; i < NUM_TESTS; i++)
            {
                ushort3 x = ~TestData_LHS[i];

                result &= x.x == (ushort)(~TestData_LHS[i].x) &
                          x.y == (ushort)(~TestData_LHS[i].y) &
                          x.z == (ushort)(~TestData_LHS[i].z);
            }

            Assert.AreEqual(true, result);
        }
Beispiel #28
0
        public static void Remainder()
        {
            bool result = true;

            for (int i = 0; i < NUM_TESTS; i++)
            {
                ushort3 x = TestData_LHS[i] % TestData_RHS[i];

                result &= x.x == (ushort)(TestData_LHS[i].x % TestData_RHS[i].x) &
                          x.y == (ushort)(TestData_LHS[i].y % TestData_RHS[i].y) &
                          x.z == (ushort)(TestData_LHS[i].z % TestData_RHS[i].z);
            }

            Assert.AreEqual(true, result);
        }
Beispiel #29
0
        public static void Multiply()
        {
            bool result = true;

            for (int i = 0; i < NUM_TESTS; i++)
            {
                ushort3 x = TestData_LHS[i] * TestData_RHS[i];

                result &= x.x == (ushort)(TestData_LHS[i].x * TestData_RHS[i].x) &
                          x.y == (ushort)(TestData_LHS[i].y * TestData_RHS[i].y) &
                          x.z == (ushort)(TestData_LHS[i].z * TestData_RHS[i].z);
            }

            Assert.AreEqual(true, result);
        }
Beispiel #30
0
        public static void Subtract()
        {
            bool result = true;

            for (int i = 0; i < NUM_TESTS; i++)
            {
                ushort3 x = TestData_LHS[i] - TestData_RHS[i];

                result &= x.x == (ushort)(TestData_LHS[i].x - TestData_RHS[i].x) &
                          x.y == (ushort)(TestData_LHS[i].y - TestData_RHS[i].y) &
                          x.z == (ushort)(TestData_LHS[i].z - TestData_RHS[i].z);
            }

            Assert.AreEqual(true, result);
        }
Beispiel #31
0
 public static extern CUResult cuMemcpyDtoH_v2(ref ushort3 dstHost, CUdeviceptr srcDevice, SizeT ByteCount);