public static ushort4 lcm(short4 x, short4 y)
        {
            ushort4 absX = (ushort4)abs(x);
            ushort4 absY = (ushort4)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
Beispiel #2
0
        public static ushort4 bitmask16(ushort4 numBits, ushort4 index = default(ushort4))
        {
            Assert.IsBetween(index.x, 0u, 16u);
            Assert.IsBetween(index.y, 0u, 16u);
            Assert.IsBetween(index.z, 0u, 16u);
            Assert.IsBetween(index.w, 0u, 16u);
            Assert.IsBetween(numBits.x, 0u, 16u - index.x);
            Assert.IsBetween(numBits.y, 0u, 16u - index.y);
            Assert.IsBetween(numBits.z, 0u, 16u - index.z);
            Assert.IsBetween(numBits.w, 0u, 16u - index.w);

            // mask
            index = shl(ushort.MaxValue, index);

            if (Sse2.IsSse2Supported)
            {
                v128 isMaxBitsMask = Sse2.cmpeq_epi16(numBits, new ushort4(16));

                return(isMaxBitsMask | andnot(index, shl(index, numBits)));
            }
            else
            {
                return((ushort4)(-toint16(numBits == 16)) | andnot(index, shl(index, numBits)));
            }
        }
Beispiel #3
0
 public ushort4x4(ushort v)
 {
     this.c0 = v;
     this.c1 = v;
     this.c2 = v;
     this.c3 = v;
 }
Beispiel #4
0
        public static ushort2x4 operator /(ushort2x4 left, ushort right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort8 div = new ushort8(left.c0, left.c1, left.c2, left.c3) / right;

                    return(new ushort2x4(div.v2_0, div.v2_2, div.v2_4, div.v2_6));
                }
            }
            else if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort4 divisor = right;
                    ushort4 lo      = new ushort4(left.c0, left.c1) / divisor;
                    ushort4 hi      = new ushort4(left.c2, left.c3) / divisor;

                    return(new ushort2x4(lo.xy, lo.zw, hi.xy, hi.zw));
                }
            }

            return(new ushort2x4(left.c0 / right, left.c1 / right, left.c2 / right, left.c3 / right));
        }
Beispiel #5
0
 public ushort4x4(ushort4 c0, ushort4 c1, ushort4 c2, ushort4 c3)
 {
     this.c0 = c0;
     this.c1 = c1;
     this.c2 = c2;
     this.c3 = c3;
 }
Beispiel #6
0
        public static ushort4 tzcnt(ushort4 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(16, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(16, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                v128 tzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                                 Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))));

                return(Sse2.min_epu8(tzcnt_bytes,
                                     Sse2.srli_epi16(Sse2.add_epi8(tzcnt_bytes, Sse2.set1_epi8(8)), 8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (ushort4)(-((short4)x));

                ushort4 first  = Mask.BlendV(default(v128), new ushort4(1), Sse2.cmpeq_epi16(compareMask, default(v128)));
                ushort4 second = Mask.BlendV(default(v128), new ushort4(8), Sse2.cmpeq_epi16(compareMask & (ushort4)0x00FF, default(v128)));
                ushort4 third  = Mask.BlendV(default(v128), new ushort4(4), Sse2.cmpeq_epi16(compareMask & (ushort4)0x0F0F, default(v128)));
                ushort4 fourth = Mask.BlendV(default(v128), new ushort4(2), Sse2.cmpeq_epi16(compareMask & (ushort4)0x3333, default(v128)));
                ushort4 fifth  = Mask.BlendV(default(v128), new ushort4(1), Sse2.cmpeq_epi16(compareMask & (ushort4)0x5555, default(v128)));

                return((first + second) + ((third + fourth) + fifth));
            }
            else
            {
                return(new ushort4(tzcnt(x.x), tzcnt(x.y), tzcnt(x.z), tzcnt(x.w)));
            }
        }
        public static int cminpos(ushort4 x, out ushort min)
        {
            if (Sse4_1.IsSse41Supported)
            {
                v128 temp = Sse4_1.minpos_epu16(Sse2.or_si128(x, new v128(0, 0, -1, -1)));
                min = temp.UShort0;

                return(temp.UShort1);
            }
            else
            {
                min = cmin(x);

                if (min == x.x)
                {
                    return(0);
                }
                else if (min == x.y)
                {
                    return(1);
                }
                else if (min == x.z)
                {
                    return(2);
                }
                else
                {
                    return(3);
                }
            }
        }
Beispiel #8
0
        public static ushort2x4 operator %(ushort2x4 left, ushort right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort8 rem = new ushort8(left.c0, left.c1, left.c2, left.c3) % right;

                    return(new ushort2x4(rem.v2_0, rem.v2_2, rem.v2_4, rem.v2_6));
                }
            }
            else if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort4 divisor = right;
                    ushort4 lo      = new ushort4(left.c0, left.c1) % divisor;
                    ushort4 hi      = new ushort4(left.c2, left.c3) % divisor;

                    return(new ushort2x4(lo.xy, lo.zw, hi.xy, hi.zw));
                }
            }

            return(new ushort2x4(left.c0 % right, left.c1 % right, left.c2 % right, left.c3 % right));
        }
Beispiel #9
0
        public static ushort4 gcd(ushort4 x, ushort4 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi16(x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi16(y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero);

                v128 doneMask = any_zero;

                ushort4 shift = tzcnt(x | y);

                x = shrl(x, tzcnt(x));

                do
                {
                    y = shrl(y, tzcnt(y));

                    if (Sse4_1.IsSse41Supported)
                    {
                        v128 tempX = x;

                        x = Sse4_1.min_epu16(x, y);
                        y = Sse4_1.max_epu16(y, tempX);
                    }
                    else
                    {
                        v128 tempX       = x;
                        v128 x_greater_y = Operator.greater_mask_ushort(x, y);

                        x = Mask.BlendV(x, y, x_greater_y);
                        y = Mask.BlendV(y, tempX, x_greater_y);
                    }

                    y -= x;

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi16(y, ZERO));
                    result   = Mask.BlendV(result, x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (-1 != doneMask.SLong0);

                result = shl(result, shift);

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(result);
            }
            else
            {
                return(new ushort4((ushort)gcd((uint)x.x, (uint)y.x), (ushort)gcd((uint)x.y, (uint)y.y), (ushort)gcd((uint)x.z, (uint)y.z), (ushort)gcd((uint)x.w, (uint)y.w)));
            }
        }
Beispiel #10
0
        public static ushort4 reversebits(ushort4 x)
        {
            x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1);
            x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2);
            x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4);

            return((x >> 8) | (x << 8));
        }
Beispiel #11
0
 public ushort4x2(ushort m00, ushort m01,
                  ushort m10, ushort m11,
                  ushort m20, ushort m21,
                  ushort m30, ushort m31)
 {
     this.c0 = new ushort4(m00, m10, m20, m30);
     this.c1 = new ushort4(m01, m11, m21, m31);
 }
Beispiel #12
0
 public ushort4x3(ushort m00, ushort m01, ushort m02,
                  ushort m10, ushort m11, ushort m12,
                  ushort m20, ushort m21, ushort m22,
                  ushort m30, ushort m31, ushort m32)
 {
     this.c0 = new ushort4(m00, m10, m20, m30);
     this.c1 = new ushort4(m01, m11, m21, m31);
     this.c2 = new ushort4(m02, m12, m22, m32);
 }
Beispiel #13
0
        public static uint cprod(ushort4 x)
        {
            uint4 cast = x;

            cast *= cast.zwzw;
            cast *= cast.yyyy;

            return(cast.x);
        }
Beispiel #14
0
        public static ushort4 floorpow2(ushort4 x)
        {
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x - (x >> 1));
        }
Beispiel #15
0
        public static ushort4 ceilpow2(ushort4 x)
        {
            x -= 1;
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x + 1);
        }
Beispiel #16
0
 public ushort4x4(ushort m00, ushort m01, ushort m02, ushort m03,
                  ushort m10, ushort m11, ushort m12, ushort m13,
                  ushort m20, ushort m21, ushort m22, ushort m23,
                  ushort m30, ushort m31, ushort m32, ushort m33)
 {
     this.c0 = new ushort4(m00, m10, m20, m30);
     this.c1 = new ushort4(m01, m11, m21, m31);
     this.c2 = new ushort4(m02, m12, m22, m32);
     this.c3 = new ushort4(m03, m13, m23, m33);
 }
Beispiel #17
0
 public static ushort4 andnot(ushort4 left, ushort4 right)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.andnot_si128(right, left));
     }
     else
     {
         return(left & ~right);
     }
 }
Beispiel #18
0
        public static void ushort4()
        {
            Random16 rng = new Random16(135);

            for (int i = 0; i < 64; i++)
            {
                ushort4 x = rng.NextUShort4();

                Assert.AreEqual(new ushort4((ushort)_intsqrt(x.x), (ushort)_intsqrt(x.y), (ushort)_intsqrt(x.z), (ushort)_intsqrt(x.w)), maxmath.intsqrt(x));
            }
        }
Beispiel #19
0
 public static ushort4 subadd(ushort4 a, ushort4 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi16(b, new ushort4(ushort.MaxValue, 1, ushort.MaxValue, 1)));
     }
     else
     {
         return(a - select(b, (ushort4)(-(short4)b), new bool4(false, true, false, true)));
     }
 }
Beispiel #20
0
 public static ushort4 avg(ushort4 x, ushort4 y)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu16(x, y));
     }
     else
     {
         return(new ushort4((ushort)((x.x + y.x + 1) >> 1), (ushort)((x.y + y.y + 1) >> 1), (ushort)((x.z + y.z + 1) >> 1), (ushort)((x.w + y.w + 1) >> 1)));
     }
 }
 public static ushort4 divrem(ushort4 dividend, ushort divisor, out ushort4 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (ushort4)divisor, out remainder));
     }
 }
Beispiel #22
0
        public static void short4()
        {
            Random16 rng = new Random16(135);

            for (int i = 0; i < 64; i++)
            {
                short4  x = rng.NextShort4();
                ushort4 n = rng.NextUShort4();

                Assert.AreEqual(new short4((short)_intpow(x.x, n.x), (short)_intpow(x.y, n.y), (short)_intpow(x.z, n.z), (short)_intpow(x.w, n.w)), maxmath.intpow(x, n));
            }
        }
Beispiel #23
0
        public static void ushort4()
        {
            Random16 rng = new Random16(135);

            for (int i = 0; i < 64; i++)
            {
                ushort4 x = rng.NextUShort4();
                ushort4 y = rng.NextUShort4();

                Assert.AreEqual(new ushort4((ushort)_gcd(x.x, y.x), (ushort)_gcd(x.y, y.y), (ushort)_gcd(x.z, y.z), (ushort)_gcd(x.w, y.w)), maxmath.gcd(x, y));
            }
        }
Beispiel #24
0
        public static ushort2x2 operator %(ushort2x2 left, ushort2x2 right)
        {
            if (Sse2.IsSse2Supported)
            {
                ushort4 fused = new ushort4(left.c0, left.c1) % new ushort4(right.c0, right.c1);

                return(new ushort2x2(fused.xy, fused.zw));
            }
            else
            {
                return(new ushort2x2(left.c0 % right.c0, left.c1 % right.c1));
            }
        }
Beispiel #25
0
        public static ushort4 countbits(ushort4 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                ushort4 byteBits = (v128)countbits((byte16)(v128)x);

                return((byteBits & 0x00FF) + (byteBits >> 8));
            }
            else
            {
                return(new ushort4((ushort)math.countbits((uint)x.x), (ushort)math.countbits((uint)x.y), (ushort)math.countbits((uint)x.z), (ushort)math.countbits((uint)x.w)));
            }
        }
Beispiel #26
0
 public static bool4 ispow2(ushort4 x)
 {
     if (Sse2.IsSse2Supported)
     {
         v128 result = (byte4)(new ushort4(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)),
                                                               Sse2.cmpeq_epi16(default(v128), x & (x - 1))));
         return(*(bool4 *)&result);
     }
     else
     {
         return(new bool4(math.ispow2((uint)x.x), math.ispow2((uint)x.y), math.ispow2((uint)x.z), math.ispow2((uint)x.w)));
     }
 }
Beispiel #27
0
        public static ushort cmin(ushort4 x)
        {
            if (Sse2.IsSse2Supported)
            {
                x = min(x, x.zwzw);

                return(min(x, x.yyyy).x);
            }
            else
            {
                return((ushort)math.min((uint)x.x, math.min((uint)x.y, math.min((uint)x.z, (uint)x.w))));
            }
        }
Beispiel #28
0
        public static ushort2x2 operator %(ushort2x2 left, ushort right)
        {
            if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort4 fused = new ushort4(left.c0, left.c1) % right;

                    return(new ushort2x2(fused.xy, fused.zw));
                }
            }

            return(new ushort2x2(left.c0 % right, left.c1 % right));
        }
Beispiel #29
0
        internal static v128 greater_mask_ushort(ushort3 left, ushort3 right)
        {
            if (Sse2.IsSse2Supported)
            {
                ushort4 mask = 1 << 15;

                return(Sse2.cmpgt_epi16(Sse2.xor_si128(left, mask),
                                        Sse2.xor_si128(right, mask)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #30
0
        public static void ushort4()
        {
            for (int i = 0; i < UShort4.TestData_LHS.Length; i++)
            {
                for (int j = 1; j < 4; j++)
                {
                    ushort4 rol = maxmath.vrol(UShort4.TestData_LHS[i], j);

                    for (int k = 0; k < 4; k++)
                    {
                        Assert.AreEqual(rol[k], UShort4.TestData_LHS[i][((4 - j) + k) % 4]);
                    }
                }
            }
        }
Beispiel #31
0
 public static extern CUResult cuMemcpyDtoH_v2(ref ushort4 dstHost, CUdeviceptr srcDevice, SizeT ByteCount);