Ejemplo n.º 1
0
        public static byte16 intsqrt(byte16 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                return(new byte16(intsqrt(x.v8_0), intsqrt(x.v8_8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                byte16 result = ZERO;
                byte16 mask   = new byte16(1 << 6);

                v128 doneMask = ZERO;


                v128 tempMask = Sse2.cmpeq_epi8(ZERO, ZERO);

                doneMask = Sse2.cmpeq_epi8(x, max(mask, x));
                tempMask = Mask.BlendV(tempMask, mask, doneMask);

                while (bitmask32(16 * sizeof(byte)) != Sse2.movemask_epi8(doneMask))
                {
                    mask >>= 2;

                    doneMask = Sse2.or_si128(doneMask, Sse2.cmpeq_epi8(x, max(mask, x)));

                    if (Sse4_1.IsSse41Supported)
                    {
                        tempMask = Mask.BlendV(tempMask, mask, Sse2.and_si128(tempMask, doneMask));
                    }
                    else
                    {
                        tempMask = Mask.BlendV(tempMask, mask, Sse2.and_si128(Sse2.cmpgt_epi8(default, tempMask), doneMask));
Ejemplo n.º 2
0
        internal static byte16 vrem_byte(byte16 dividend, byte16 divisor)
        {
            Assert.AreNotEqual(divisor.x0, 0);
            Assert.AreNotEqual(divisor.x1, 0);
            Assert.AreNotEqual(divisor.x2, 0);
            Assert.AreNotEqual(divisor.x3, 0);
            Assert.AreNotEqual(divisor.x4, 0);
            Assert.AreNotEqual(divisor.x5, 0);
            Assert.AreNotEqual(divisor.x6, 0);
            Assert.AreNotEqual(divisor.x7, 0);
            Assert.AreNotEqual(divisor.x8, 0);
            Assert.AreNotEqual(divisor.x9, 0);
            Assert.AreNotEqual(divisor.x10, 0);
            Assert.AreNotEqual(divisor.x11, 0);
            Assert.AreNotEqual(divisor.x12, 0);
            Assert.AreNotEqual(divisor.x13, 0);
            Assert.AreNotEqual(divisor.x14, 0);
            Assert.AreNotEqual(divisor.x15, 0);

            if (Avx2.IsAvx2Supported)
            {
                ushort16 remainders = ushort16.zero;

                ushort16 divisorCast  = divisor;
                ushort16 dividendCast = dividend;


                remainders |= (new ushort16(1) & (dividendCast >> 7));

                v256 subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder);

                for (int i = 6; i > 0; i--)
                {
                    remainders <<= 1;

                    remainders |= (new ushort16(1) & (dividendCast >> i));

                    subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                    remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder);
                }

                remainders <<= 1;

                remainders |= new ushort16(1) & dividendCast;

                subtractDivisorFromRemainder = Avx2.mm256_cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Avx2.mm256_blendv_epi8(default(v256), divisorCast, subtractDivisorFromRemainder);


                return(Sse2.packus_epi16(Avx.mm256_castsi256_si128(remainders), Avx2.mm256_extracti128_si256(remainders, 1)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Ejemplo n.º 3
0
        public static void sbyte16()
        {
            Random8 rng = new Random8(135);

            for (int i = 0; i < 64; i++)
            {
                sbyte16 x = rng.NextSByte16();
                byte16  n = rng.NextByte16();

                Assert.AreEqual(new sbyte16((sbyte)_intpow(x.x0, n.x0),
                                            (sbyte)_intpow(x.x1, n.x1),
                                            (sbyte)_intpow(x.x2, n.x2),
                                            (sbyte)_intpow(x.x3, n.x3),
                                            (sbyte)_intpow(x.x4, n.x4),
                                            (sbyte)_intpow(x.x5, n.x5),
                                            (sbyte)_intpow(x.x6, n.x6),
                                            (sbyte)_intpow(x.x7, n.x7),
                                            (sbyte)_intpow(x.x8, n.x8),
                                            (sbyte)_intpow(x.x9, n.x9),
                                            (sbyte)_intpow(x.x10, n.x10),
                                            (sbyte)_intpow(x.x11, n.x11),
                                            (sbyte)_intpow(x.x12, n.x12),
                                            (sbyte)_intpow(x.x13, n.x13),
                                            (sbyte)_intpow(x.x14, n.x14),
                                            (sbyte)_intpow(x.x15, n.x15)),
                                maxmath.intpow(x, n));
            }
        }
Ejemplo n.º 4
0
        public static void rol_byte16()
        {
            bool     result = true;
            Random32 rng    = new Random32(RNG_SEED);

            for (int i = 0; i < Byte16.NUM_TESTS; i++)
            {
                for (int j = 0; j < NUM_ROTATION_TESTS; j++)
                {
                    int    n    = rng.NextInt();
                    byte16 test = maxmath.rol(Byte16.TestData_LHS[i], n);

                    result &= test.x0 == (byte)math.rol(Byte16.TestData_LHS[i].x0 | (Byte16.TestData_LHS[i].x0 << 8) | (Byte16.TestData_LHS[i].x0 << 16) | (Byte16.TestData_LHS[i].x0 << 24), n);
                    result &= test.x1 == (byte)math.rol(Byte16.TestData_LHS[i].x1 | (Byte16.TestData_LHS[i].x1 << 8) | (Byte16.TestData_LHS[i].x1 << 16) | (Byte16.TestData_LHS[i].x1 << 24), n);
                    result &= test.x2 == (byte)math.rol(Byte16.TestData_LHS[i].x2 | (Byte16.TestData_LHS[i].x2 << 8) | (Byte16.TestData_LHS[i].x2 << 16) | (Byte16.TestData_LHS[i].x2 << 24), n);
                    result &= test.x3 == (byte)math.rol(Byte16.TestData_LHS[i].x3 | (Byte16.TestData_LHS[i].x3 << 8) | (Byte16.TestData_LHS[i].x3 << 16) | (Byte16.TestData_LHS[i].x3 << 24), n);
                    result &= test.x4 == (byte)math.rol(Byte16.TestData_LHS[i].x4 | (Byte16.TestData_LHS[i].x4 << 8) | (Byte16.TestData_LHS[i].x4 << 16) | (Byte16.TestData_LHS[i].x4 << 24), n);
                    result &= test.x5 == (byte)math.rol(Byte16.TestData_LHS[i].x5 | (Byte16.TestData_LHS[i].x5 << 8) | (Byte16.TestData_LHS[i].x5 << 16) | (Byte16.TestData_LHS[i].x5 << 24), n);
                    result &= test.x6 == (byte)math.rol(Byte16.TestData_LHS[i].x6 | (Byte16.TestData_LHS[i].x6 << 8) | (Byte16.TestData_LHS[i].x6 << 16) | (Byte16.TestData_LHS[i].x6 << 24), n);
                    result &= test.x7 == (byte)math.rol(Byte16.TestData_LHS[i].x7 | (Byte16.TestData_LHS[i].x7 << 8) | (Byte16.TestData_LHS[i].x7 << 16) | (Byte16.TestData_LHS[i].x7 << 24), n);
                    result &= test.x8 == (byte)math.rol(Byte16.TestData_LHS[i].x8 | (Byte16.TestData_LHS[i].x8 << 8) | (Byte16.TestData_LHS[i].x8 << 16) | (Byte16.TestData_LHS[i].x8 << 24), n);
                    result &= test.x9 == (byte)math.rol(Byte16.TestData_LHS[i].x9 | (Byte16.TestData_LHS[i].x9 << 8) | (Byte16.TestData_LHS[i].x9 << 16) | (Byte16.TestData_LHS[i].x9 << 24), n);
                    result &= test.x10 == (byte)math.rol(Byte16.TestData_LHS[i].x10 | (Byte16.TestData_LHS[i].x10 << 8) | (Byte16.TestData_LHS[i].x10 << 16) | (Byte16.TestData_LHS[i].x10 << 24), n);
                    result &= test.x11 == (byte)math.rol(Byte16.TestData_LHS[i].x11 | (Byte16.TestData_LHS[i].x11 << 8) | (Byte16.TestData_LHS[i].x11 << 16) | (Byte16.TestData_LHS[i].x11 << 24), n);
                    result &= test.x12 == (byte)math.rol(Byte16.TestData_LHS[i].x12 | (Byte16.TestData_LHS[i].x12 << 8) | (Byte16.TestData_LHS[i].x12 << 16) | (Byte16.TestData_LHS[i].x12 << 24), n);
                    result &= test.x13 == (byte)math.rol(Byte16.TestData_LHS[i].x13 | (Byte16.TestData_LHS[i].x13 << 8) | (Byte16.TestData_LHS[i].x13 << 16) | (Byte16.TestData_LHS[i].x13 << 24), n);
                    result &= test.x14 == (byte)math.rol(Byte16.TestData_LHS[i].x14 | (Byte16.TestData_LHS[i].x14 << 8) | (Byte16.TestData_LHS[i].x14 << 16) | (Byte16.TestData_LHS[i].x14 << 24), n);
                    result &= test.x15 == (byte)math.rol(Byte16.TestData_LHS[i].x15 | (Byte16.TestData_LHS[i].x15 << 8) | (Byte16.TestData_LHS[i].x15 << 16) | (Byte16.TestData_LHS[i].x15 << 24), n);
                }
            }

            Assert.AreEqual(true, result);
        }
Ejemplo n.º 5
0
        public static byte16 lcm(sbyte16 x, sbyte16 y)
        {
            byte16 absX = (byte16)abs(x);
            byte16 absY = (byte16)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
Ejemplo n.º 6
0
        public byte16 NextByte16(byte16 min, byte16 max)
        {
            Assert.IsNotSmaller(max.x0, min.x0);
            Assert.IsNotSmaller(max.x1, min.x1);
            Assert.IsNotSmaller(max.x2, min.x2);
            Assert.IsNotSmaller(max.x3, min.x3);
            Assert.IsNotSmaller(max.x4, min.x4);
            Assert.IsNotSmaller(max.x5, min.x5);
            Assert.IsNotSmaller(max.x6, min.x6);
            Assert.IsNotSmaller(max.x7, min.x7);
            Assert.IsNotSmaller(max.x8, min.x8);
            Assert.IsNotSmaller(max.x9, min.x9);
            Assert.IsNotSmaller(max.x10, min.x10);
            Assert.IsNotSmaller(max.x11, min.x11);
            Assert.IsNotSmaller(max.x12, min.x12);
            Assert.IsNotSmaller(max.x13, min.x13);
            Assert.IsNotSmaller(max.x14, min.x14);
            Assert.IsNotSmaller(max.x15, min.x15);

            if (Avx2.IsAvx2Supported)
            {
                short16 temp = (short16)(max - min) * new short16(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState());

                temp = Avx2.mm256_shuffle_epi8(temp, new v256(1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0,
                                                              1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0));

                return(min + Avx.mm256_castsi256_si128(Avx2.mm256_permute4x64_epi64(temp, Sse.SHUFFLE(0, 0, 2, 0))));
            }
            else
            {
                return(new byte16(NextByte8(min.v8_0, max.v8_0), NextByte8(min.v8_8, max.v8_8)));
            }
        }
Ejemplo n.º 7
0
        internal static byte8 vdivrem_byte_SSE_FALLBACK(byte8 dividend, byte8 divisor, out byte8 remainder)
        {
            Assert.AreNotEqual(divisor.x0, 0);
            Assert.AreNotEqual(divisor.x1, 0);
            Assert.AreNotEqual(divisor.x2, 0);
            Assert.AreNotEqual(divisor.x3, 0);
            Assert.AreNotEqual(divisor.x4, 0);
            Assert.AreNotEqual(divisor.x5, 0);
            Assert.AreNotEqual(divisor.x6, 0);
            Assert.AreNotEqual(divisor.x7, 0);

            if (Sse2.IsSse2Supported)
            {
                ushort8 quotients  = ushort8.zero;
                ushort8 remainders = ushort8.zero;

                ushort8 divisorCast  = divisor;
                ushort8 dividendCast = dividend;


                remainders |= (new ushort8(1) & (dividendCast >> 7));

                v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                quotients  |= new ushort8(1) & subtractDivisorFromRemainder;

                for (int i = 6; i > 0; i--)
                {
                    quotients  <<= 1;
                    remainders <<= 1;

                    remainders |= (new ushort8(1) & (dividendCast >> i));

                    subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisorCast, remainders), divisorCast);

                    remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                    quotients  |= new ushort8(1) & subtractDivisorFromRemainder;
                }

                remainders <<= 1;
                quotients  <<= 1;

                remainders |= new ushort8(1) & dividendCast;

                subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                quotients  |= new ushort8(1) & subtractDivisorFromRemainder;


                byte16 temp = Sse2.packus_epi16(remainders, quotients);
                remainder = temp.v8_0;
                return(temp.v8_8);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Ejemplo n.º 8
0
        public static void byte16()
        {
            Random8 rng = new Random8(135);

            for (int i = 0; i < 64; i++)
            {
                byte16 x = rng.NextByte16();

                Assert.AreEqual(new byte16((byte)_intsqrt(x.x0),
                                           (byte)_intsqrt(x.x1),
                                           (byte)_intsqrt(x.x2),
                                           (byte)_intsqrt(x.x3),
                                           (byte)_intsqrt(x.x4),
                                           (byte)_intsqrt(x.x5),
                                           (byte)_intsqrt(x.x6),
                                           (byte)_intsqrt(x.x7),
                                           (byte)_intsqrt(x.x8),
                                           (byte)_intsqrt(x.x9),
                                           (byte)_intsqrt(x.x10),
                                           (byte)_intsqrt(x.x11),
                                           (byte)_intsqrt(x.x12),
                                           (byte)_intsqrt(x.x13),
                                           (byte)_intsqrt(x.x14),
                                           (byte)_intsqrt(x.x15)),
                                maxmath.intsqrt(x));
            }
        }
Ejemplo n.º 9
0
        public static int indexof(byte32 v, byte x)
        {
            if (Avx2.IsAvx2Supported)
            {
                return(math.tzcnt(Avx2.mm256_movemask_epi8(Avx2.mm256_cmpeq_epi8(v, new byte32(x)))));
            }
            else if (Sse2.IsSse2Supported)
            {
                byte16 broadcast = x;

                return(math.tzcnt(Sse2.movemask_epi8(Sse2.cmpeq_epi8(v._v16_0, broadcast)) |
                                  (Sse2.movemask_epi8(Sse2.cmpeq_epi8(v._v16_16, broadcast)) << 16)));
            }
            else
            {
                for (int i = 0; i < 32; i++)
                {
                    if (v[i] == x)
                    {
                        return(i);
                    }
                    else
                    {
                        continue;
                    }
                }

                return(32);
            }
        }
Ejemplo n.º 10
0
        public static sbyte16 compareto(byte16 x, byte16 y)
        {
            if (Sse2.IsSse2Supported)
            {
                sbyte16 xGreatery = Operator.greater_mask_byte(x, y);
                sbyte16 yGreaterx = Operator.greater_mask_byte(y, x);

                return((0 - xGreatery) + yGreaterx);
            }
            else
            {
                return(new sbyte16((sbyte)compareto(x.x0, y.x0),
                                   (sbyte)compareto(x.x1, y.x1),
                                   (sbyte)compareto(x.x2, y.x2),
                                   (sbyte)compareto(x.x3, y.x3),
                                   (sbyte)compareto(x.x4, y.x4),
                                   (sbyte)compareto(x.x5, y.x5),
                                   (sbyte)compareto(x.x6, y.x6),
                                   (sbyte)compareto(x.x7, y.x7),
                                   (sbyte)compareto(x.x8, y.x8),
                                   (sbyte)compareto(x.x9, y.x9),
                                   (sbyte)compareto(x.x10, y.x10),
                                   (sbyte)compareto(x.x11, y.x11),
                                   (sbyte)compareto(x.x12, y.x12),
                                   (sbyte)compareto(x.x13, y.x13),
                                   (sbyte)compareto(x.x14, y.x14),
                                   (sbyte)compareto(x.x15, y.x15)));
            }
        }
Ejemplo n.º 11
0
        public static void byte16()
        {
            Random8 rng = new Random8(135);

            for (int i = 0; i < 64; i++)
            {
                byte16 x = rng.NextByte16();
                byte16 y = rng.NextByte16();

                Assert.AreEqual(new byte16((byte)_gcd(x.x0, y.x0),
                                           (byte)_gcd(x.x1, y.x1),
                                           (byte)_gcd(x.x2, y.x2),
                                           (byte)_gcd(x.x3, y.x3),
                                           (byte)_gcd(x.x4, y.x4),
                                           (byte)_gcd(x.x5, y.x5),
                                           (byte)_gcd(x.x6, y.x6),
                                           (byte)_gcd(x.x7, y.x7),
                                           (byte)_gcd(x.x8, y.x8),
                                           (byte)_gcd(x.x9, y.x9),
                                           (byte)_gcd(x.x10, y.x10),
                                           (byte)_gcd(x.x11, y.x11),
                                           (byte)_gcd(x.x12, y.x12),
                                           (byte)_gcd(x.x13, y.x13),
                                           (byte)_gcd(x.x14, y.x14),
                                           (byte)_gcd(x.x15, y.x15)),
                                maxmath.gcd(x, y));
            }
        }
Ejemplo n.º 12
0
        public static bool16 isdivisible(byte16 dividend, byte16 divisor)
        {
            Assert.AreNotEqual(0, divisor.x0);
            Assert.AreNotEqual(0, divisor.x1);
            Assert.AreNotEqual(0, divisor.x2);
            Assert.AreNotEqual(0, divisor.x3);
            Assert.AreNotEqual(0, divisor.x4);
            Assert.AreNotEqual(0, divisor.x5);
            Assert.AreNotEqual(0, divisor.x6);
            Assert.AreNotEqual(0, divisor.x7);
            Assert.AreNotEqual(0, divisor.x8);
            Assert.AreNotEqual(0, divisor.x9);
            Assert.AreNotEqual(0, divisor.x10);
            Assert.AreNotEqual(0, divisor.x11);
            Assert.AreNotEqual(0, divisor.x12);
            Assert.AreNotEqual(0, divisor.x13);
            Assert.AreNotEqual(0, divisor.x14);
            Assert.AreNotEqual(0, divisor.x15);

            if (Constant.IsConstantExpression(divisor))
            {
                ushort16 compile = (new ushort16(ushort.MaxValue) / divisor) + 1;

                return(dividend * compile <= compile - 1);
            }
            else
            {
                return(dividend % divisor == 0);
            }
        }
Ejemplo n.º 13
0
        public static byte16 reversebits(byte16 x)
        {
            x = ((x >> 1) & 0x55) | ((x & 0x55) << 1);
            x = ((x >> 2) & 0x33) | ((x & 0x33) << 2);

            return((x >> 4) | (x << 4));
        }
Ejemplo n.º 14
0
        public static byte16 tzcnt(byte16 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                     Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (byte16)(-(sbyte16)x);

                byte16 first  = Mask.BlendV(default(v128), new byte16(1), Sse2.cmpeq_epi8(compareMask, default(v128)));
                byte16 second = Mask.BlendV(default(v128), new byte16(4), Sse2.cmpeq_epi8(compareMask & (byte16)0x0F, default(v128)));
                byte16 third  = Mask.BlendV(default(v128), new byte16(2), Sse2.cmpeq_epi8(compareMask & (byte16)0x33, default(v128)));
                byte16 fourth = Mask.BlendV(default(v128), new byte16(1), Sse2.cmpeq_epi8(compareMask & (byte16)0x55, default(v128)));

                return((first + second) + (third + fourth));
            }
            else
            {
                return(new byte16(tzcnt(x.x0), tzcnt(x.x1), tzcnt(x.x2), tzcnt(x.x3), tzcnt(x.x4), tzcnt(x.x5), tzcnt(x.x6), tzcnt(x.x7), tzcnt(x.x8), tzcnt(x.x9), tzcnt(x.x10), tzcnt(x.x11), tzcnt(x.x12), tzcnt(x.x13), tzcnt(x.x14), tzcnt(x.x15)));
            }
        }
Ejemplo n.º 15
0
        public static byte16 floorpow2(byte16 x)
        {
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;

            return(x - (x >> 1));
        }
Ejemplo n.º 16
0
        public static byte16 ceilpow2(byte16 x)
        {
            x -= 1;
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;

            return(x + 1);
        }
Ejemplo n.º 17
0
        internal static byte16 vrem_byte_SSE_FALLBACK(byte16 dividend, byte16 divisor)
        {
            Assert.AreNotEqual(divisor.x0, 0);
            Assert.AreNotEqual(divisor.x1, 0);
            Assert.AreNotEqual(divisor.x2, 0);
            Assert.AreNotEqual(divisor.x3, 0);
            Assert.AreNotEqual(divisor.x4, 0);
            Assert.AreNotEqual(divisor.x5, 0);
            Assert.AreNotEqual(divisor.x6, 0);
            Assert.AreNotEqual(divisor.x7, 0);
            Assert.AreNotEqual(divisor.x8, 0);
            Assert.AreNotEqual(divisor.x9, 0);
            Assert.AreNotEqual(divisor.x10, 0);
            Assert.AreNotEqual(divisor.x11, 0);
            Assert.AreNotEqual(divisor.x12, 0);
            Assert.AreNotEqual(divisor.x13, 0);
            Assert.AreNotEqual(divisor.x14, 0);
            Assert.AreNotEqual(divisor.x15, 0);

            if (Sse2.IsSse2Supported)
            {
                byte16 remainders = byte16.zero;


                remainders |= (new byte16(1) & (dividend >> 7));

                v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisor, remainders), divisor);

                remainders -= Mask.BlendV(default(v128), divisor, subtractDivisorFromRemainder);

                for (int i = 6; i > 0; i--)
                {
                    remainders <<= 1;

                    remainders |= (new byte16(1) & (dividend >> i));

                    subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisor, remainders), divisor);

                    remainders -= Mask.BlendV(default(v128), divisor, subtractDivisorFromRemainder);
                }

                remainders <<= 1;;

                remainders |= new byte16(1) & dividend;

                subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisor, remainders), divisor);

                remainders -= Mask.BlendV(default(v128), divisor, subtractDivisorFromRemainder);


                return(remainders);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Ejemplo n.º 18
0
        public static sbyte16 intpow(sbyte16 x, byte16 n)
        {
            if (Sse2.IsSse2Supported)
            {
                v128    ZERO = default(v128);
                sbyte16 ONE  = new sbyte16(1);

                v128 doneMask = ZERO;
                v128 result   = ZERO;

                sbyte16 p = x;
                sbyte16 y = ONE;


Loop:
                v128 y_times_p = y * p;
                y = Mask.BlendV(y, y_times_p, Sse2.cmpeq_epi8(ONE, Sse2.and_si128(ONE, n)));

                n >>= 1;

                v128 n_is_zero = Sse2.cmpeq_epi8(ZERO, n);
                result   = Mask.BlendV(result, y, Sse2.andnot_si128(doneMask, n_is_zero));
                doneMask = n_is_zero;


                if (bitmask32(16 * sizeof(sbyte)) != Sse2.movemask_epi8(doneMask))
                {
                    p *= p;

                    goto Loop;
                }
                else
                {
                    return(result);
                }
            }
            else
            {
                return(new sbyte16((sbyte)intpow((int)x.x0, n.x0),
                                   (sbyte)intpow((int)x.x1, n.x1),
                                   (sbyte)intpow((int)x.x2, n.x2),
                                   (sbyte)intpow((int)x.x3, n.x3),
                                   (sbyte)intpow((int)x.x4, n.x4),
                                   (sbyte)intpow((int)x.x5, n.x5),
                                   (sbyte)intpow((int)x.x6, n.x6),
                                   (sbyte)intpow((int)x.x7, n.x7),
                                   (sbyte)intpow((int)x.x8, n.x8),
                                   (sbyte)intpow((int)x.x9, n.x9),
                                   (sbyte)intpow((int)x.x10, n.x10),
                                   (sbyte)intpow((int)x.x11, n.x11),
                                   (sbyte)intpow((int)x.x12, n.x12),
                                   (sbyte)intpow((int)x.x13, n.x13),
                                   (sbyte)intpow((int)x.x14, n.x14),
                                   (sbyte)intpow((int)x.x15, n.x15)));
            }
        }
Ejemplo n.º 19
0
 public static byte16 max(byte16 a, byte16 b)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.max_epu8(a, b));
     }
     else
     {
         return(new byte16((byte)math.max((uint)a.x0, (uint)b.x0), (byte)math.max((uint)a.x1, (uint)b.x1), (byte)math.max((uint)a.x2, (uint)b.x2), (byte)math.max((uint)a.x3, (uint)b.x3), (byte)math.max((uint)a.x4, (uint)b.x4), (byte)math.max((uint)a.x5, (uint)b.x5), (byte)math.max((uint)a.x6, (uint)b.x6), (byte)math.max((uint)a.x7, (uint)b.x7), (byte)math.max((uint)a.x8, (uint)b.x8), (byte)math.max((uint)a.x9, (uint)b.x9), (byte)math.max((uint)a.x10, (uint)b.x10), (byte)math.max((uint)a.x11, (uint)b.x11), (byte)math.max((uint)a.x12, (uint)b.x12), (byte)math.max((uint)a.x13, (uint)b.x13), (byte)math.max((uint)a.x14, (uint)b.x14), (byte)math.max((uint)a.x15, (uint)b.x15)));
     }
 }
Ejemplo n.º 20
0
 public static byte16 andnot(byte16 left, byte16 right)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.andnot_si128(right, left));
     }
     else
     {
         return(left & ~right);
     }
 }
Ejemplo n.º 21
0
 public static byte16 avg(byte16 x, byte16 y)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu8(x, y));
     }
     else
     {
         return(new byte16((byte)((x.x0 + y.x0 + 1) >> 1), (byte)((x.x1 + y.x1 + 1) >> 1), (byte)((x.x2 + y.x2 + 1) >> 1), (byte)((x.x3 + y.x3 + 1) >> 1), (byte)((x.x4 + y.x4 + 1) >> 1), (byte)((x.x5 + y.x5 + 1) >> 1), (byte)((x.x6 + y.x6 + 1) >> 1), (byte)((x.x7 + y.x7 + 1) >> 1), (byte)((x.x8 + y.x8 + 1) >> 1), (byte)((x.x9 + y.x9 + 1) >> 1), (byte)((x.x10 + y.x10 + 1) >> 1), (byte)((x.x11 + y.x11 + 1) >> 1), (byte)((x.x12 + y.x12 + 1) >> 1), (byte)((x.x13 + y.x13 + 1) >> 1), (byte)((x.x14 + y.x14 + 1) >> 1), (byte)((x.x15 + y.x15 + 1) >> 1)));
     }
 }
Ejemplo n.º 22
0
 public static bool all_eq(byte16 c)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(((byte16)Ssse3.shuffle_epi8(c, default(v128))).Equals(c));
     }
     else
     {
         return((((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & (c.x0 == c.x7 & c.x0 == c.x8))) & (((c.x0 == c.x9 & c.x0 == c.x10) & (c.x0 == c.x11 & c.x0 == c.x12)) & ((c.x0 == c.x13 & c.x0 == c.x14) & c.x0 == c.x15)));
     }
 }
Ejemplo n.º 23
0
 public static uint csum(byte16 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(sad(x, byte16.zero));
     }
     else
     {
         return((uint)((((x.x0 + x.x1) + (x.x2 + x.x3)) + ((x.x4 + x.x5) + (x.x6 + x.x7))) + (((x.x8 + x.x9) + (x.x10 + x.x11)) + ((x.x12 + x.x13) + (x.x14 + x.x15)))));
     }
 }
Ejemplo n.º 24
0
 public static bool any(byte16 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(bitmask32(16 * sizeof(byte)) != Sse2.movemask_epi8(Sse2.cmpeq_epi8(x, default(v128))));
     }
     else
     {
         return(any(x != 0));
     }
 }
Ejemplo n.º 25
0
 public static byte16 subadd(byte16 a, byte16 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi8(b, new v128(255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1)));
     }
     else
     {
         return(a - select(b, (byte16)(-(sbyte16)b), new bool16(false, true, false, true, false, true, false, true, false, true, false, true, false, true, false, true)));
     }
 }
Ejemplo n.º 26
0
 public static bool all(byte16 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(0 == Sse2.movemask_epi8(Sse2.cmpeq_epi8(x, default(v128))));
     }
     else
     {
         return(all(x != 0));
     }
 }
Ejemplo n.º 27
0
 public static byte16 divrem(byte16 dividend, byte divisor, out byte16 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (byte16)divisor, out remainder));
     }
 }
Ejemplo n.º 28
0
        public static bool16 toboolsafe(ushort16 x)
        {
            if (Sse2.IsSse2Supported)
            {
                return((v128)(byte16)clamp(x, 0, 1));
            }
            else
            {
                byte16 temp = (byte16)clamp(x, 0, 1);

                return(*(bool16 *)&temp);
            }
        }
Ejemplo n.º 29
0
 public static bool16 ispow2(byte16 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.and_si128(Sse2.and_si128(Operator.greater_mask_byte(x, byte16.zero),
                                              Sse2.cmpeq_epi8(default(v128), x & (x - 1))),
                               new byte16(1)));
     }
     else
     {
         return(new bool16(math.ispow2((uint)x.x0), math.ispow2((uint)x.x1), math.ispow2((uint)x.x2), math.ispow2((uint)x.x3), math.ispow2((uint)x.x4), math.ispow2((uint)x.x5), math.ispow2((uint)x.x6), math.ispow2((uint)x.x7), math.ispow2((uint)x.x8), math.ispow2((uint)x.x9), math.ispow2((uint)x.x10), math.ispow2((uint)x.x11), math.ispow2((uint)x.x12), math.ispow2((uint)x.x13), math.ispow2((uint)x.x14), math.ispow2((uint)x.x15)));
     }
 }
Ejemplo n.º 30
0
        public static uint sad(byte16 a, byte16 b)
        {
            if (Sse2.IsSse2Supported)
            {
                a = Sse2.sad_epu8(a, b);

                return(Sse2.add_epi16(a, Sse2.shuffle_epi32(a, Sse.SHUFFLE(0, 0, 0, 2))).UShort0);
            }
            else
            {
                return((uint)(((math.abs(a.x0 - b.x0) + math.abs(a.x1 - b.x1)) + (math.abs(a.x2 - b.x2) + math.abs(a.x3 - b.x3))) + (((math.abs(a.x4 - b.x4) + math.abs(a.x5 - b.x5)) + (math.abs(a.x6 - b.x6) + math.abs(a.x7 - b.x7)))) + (((math.abs(a.x8 - b.x8) + math.abs(a.x9 - b.x9)) + (math.abs(a.x10 - b.x10) + math.abs(a.x11 - b.x11))) + ((math.abs(a.x12 - b.x12) + math.abs(a.x13 - b.x13)) + (math.abs(a.x14 - b.x14) + math.abs(a.x15 - b.x15))))));
            }
        }