Exemple #1
0
        public static byte8 tzcnt(byte8 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                     Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (byte8)(-(sbyte8)x);

                byte8 first  = Mask.BlendV(default(v128), new byte8(1), Sse2.cmpeq_epi8(compareMask, default(v128)));
                byte8 second = Mask.BlendV(default(v128), new byte8(4), Sse2.cmpeq_epi8(compareMask & (byte8)0x0F, default(v128)));
                byte8 third  = Mask.BlendV(default(v128), new byte8(2), Sse2.cmpeq_epi8(compareMask & (byte8)0x33, default(v128)));
                byte8 fourth = Mask.BlendV(default(v128), new byte8(1), Sse2.cmpeq_epi8(compareMask & (byte8)0x55, default(v128)));

                return((first + second) + (third + fourth));
            }
            else
            {
                return(new byte8(tzcnt(x.x0), tzcnt(x.x1), tzcnt(x.x2), tzcnt(x.x3), tzcnt(x.x4), tzcnt(x.x5), tzcnt(x.x6), tzcnt(x.x7)));
            }
        }
Exemple #2
0
        internal static byte8 vrem_byte(byte8 dividend, byte8 divisor)
        {
            int8 castDividend = dividend;
            int8 castDivisor  = divisor;

            return((byte8)(castDividend - ((int8)vdiv_byte_quotient(castDividend, castDivisor) * castDivisor)));
        }
Exemple #3
0
        internal static byte8 vdivrem_byte_SSE_FALLBACK(byte8 dividend, byte8 divisor, out byte8 remainder)
        {
            Assert.AreNotEqual(divisor.x0, 0);
            Assert.AreNotEqual(divisor.x1, 0);
            Assert.AreNotEqual(divisor.x2, 0);
            Assert.AreNotEqual(divisor.x3, 0);
            Assert.AreNotEqual(divisor.x4, 0);
            Assert.AreNotEqual(divisor.x5, 0);
            Assert.AreNotEqual(divisor.x6, 0);
            Assert.AreNotEqual(divisor.x7, 0);

            if (Sse2.IsSse2Supported)
            {
                ushort8 quotients  = ushort8.zero;
                ushort8 remainders = ushort8.zero;

                ushort8 divisorCast  = divisor;
                ushort8 dividendCast = dividend;


                remainders |= (new ushort8(1) & (dividendCast >> 7));

                v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                quotients  |= new ushort8(1) & subtractDivisorFromRemainder;

                for (int i = 6; i > 0; i--)
                {
                    quotients  <<= 1;
                    remainders <<= 1;

                    remainders |= (new ushort8(1) & (dividendCast >> i));

                    subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisorCast, remainders), divisorCast);

                    remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                    quotients  |= new ushort8(1) & subtractDivisorFromRemainder;
                }

                remainders <<= 1;
                quotients  <<= 1;

                remainders |= new ushort8(1) & dividendCast;

                subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                quotients  |= new ushort8(1) & subtractDivisorFromRemainder;


                byte16 temp = Sse2.packus_epi16(remainders, quotients);
                remainder = temp.v8_0;
                return(temp.v8_8);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Exemple #4
0
        public static byte8 reversebits(byte8 x)
        {
            x = ((x >> 1) & 0x55) | ((x & 0x55) << 1);
            x = ((x >> 2) & 0x33) | ((x & 0x33) << 2);

            return((x >> 4) | (x << 4));
        }
Exemple #5
0
        public static byte2x3 operator %(byte2x3 left, byte2x3 right)
        {
            if (Sse2.IsSse2Supported)
            {
#if DEBUG
                byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1),
                                                       Sse2.unpacklo_epi16(left.c2, new byte2(1)));
                byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1),
                                                       Sse2.unpacklo_epi16(right.c2, new byte2(1)));

                byte8 rem = packed_LHS % packed_RHS;

                return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4));
#else
                byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1),
                                                       left.c2);
                byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1),
                                                       right.c2);

                byte8 rem = packed_LHS % packed_RHS;

                return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4));
#endif
            }
            else
            {
                return(new byte2x3(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2));
            }
        }
Exemple #6
0
        public static byte2x3 operator %(byte2x3 left, byte right)
        {
            if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
#if DEBUG
                    byte8 packed = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1),
                                                       Sse2.unpacklo_epi16(left.c2, new byte2(1)));

                    byte8 rem = packed % right;

                    return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4));
#else
                    byte8 packed = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1),
                                                       left.c2);

                    byte8 rem = packed % right;

                    return(new byte2x3(rem.v2_0, rem.v2_2, rem.v2_4));
#endif
                }
            }

            return(new byte2x3(left.c0 % right, left.c1 % right, left.c2 % right));
        }
Exemple #7
0
        public static byte2x3 operator /(byte2x3 left, byte2x3 right)
        {
            if (Sse2.IsSse2Supported)
            {
#if DEBUG
                byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1),
                                                       Sse2.unpacklo_epi16(left.c2, new byte2(1)));
                byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1),
                                                       Sse2.unpacklo_epi16(right.c2, new byte2(1)));

                byte8 div = packed_LHS / packed_RHS;

                return(new byte2x3(div.v2_0, div.v2_2, div.v2_4));
#else
                byte8 packed_LHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(left.c0, left.c1),
                                                       left.c2);
                byte8 packed_RHS = Sse2.unpacklo_epi32(Sse2.unpacklo_epi16(right.c0, right.c1),
                                                       right.c2);

                byte8 div = packed_LHS / packed_RHS;

                return(new byte2x3(div.v2_0, div.v2_2, div.v2_4));
#endif
            }
            else
            {
                return(new byte2x3(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2));
            }
        }
        public static byte8 lcm(sbyte8 x, sbyte8 y)
        {
            byte8 absX = (byte8)abs(x);
            byte8 absY = (byte8)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
Exemple #9
0
        public static byte8 bitmask8(byte8 numBits, byte8 index = default(byte8))
        {
            Assert.IsBetween(index.x0, 0u, 8u);
            Assert.IsBetween(index.x1, 0u, 8u);
            Assert.IsBetween(index.x2, 0u, 8u);
            Assert.IsBetween(index.x3, 0u, 8u);
            Assert.IsBetween(index.x4, 0u, 8u);
            Assert.IsBetween(index.x5, 0u, 8u);
            Assert.IsBetween(index.x6, 0u, 8u);
            Assert.IsBetween(index.x7, 0u, 8u);
            Assert.IsBetween(numBits.x0, 0u, 8u - index.x0);
            Assert.IsBetween(numBits.x1, 0u, 8u - index.x1);
            Assert.IsBetween(numBits.x2, 0u, 8u - index.x2);
            Assert.IsBetween(numBits.x3, 0u, 8u - index.x3);
            Assert.IsBetween(numBits.x4, 0u, 8u - index.x4);
            Assert.IsBetween(numBits.x5, 0u, 8u - index.x5);
            Assert.IsBetween(numBits.x6, 0u, 8u - index.x6);
            Assert.IsBetween(numBits.x7, 0u, 8u - index.x7);


            if (Sse2.IsSse2Supported)
            {
                // mask
                index = shl(byte.MaxValue, index);

                v128 isMaxBitsMask = Sse2.cmpeq_epi8(numBits, new byte8(8));

                return(isMaxBitsMask | andnot(index, shl(index, numBits)));
            }
            else
            {
                return((byte8)(-toint16(numBits == 16)) | andnot(index, shl(index, numBits)));
            }
        }
Exemple #10
0
        public static byte8 floorpow2(byte8 x)
        {
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;

            return(x - (x >> 1));
        }
Exemple #11
0
        public static byte8 ceilpow2(byte8 x)
        {
            x -= 1;
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;

            return(x + 1);
        }
Exemple #12
0
        internal static byte8 vdivrem_byte(byte8 dividend, byte8 divisor, out byte8 remainder)
        {
            int8 castDividend = dividend;
            int8 castDivisor  = divisor;
            int8 quotientCast = (int8)vdiv_byte_quotient(castDividend, castDivisor);

            remainder = (byte8)(castDividend - quotientCast * castDivisor);
            return((byte8)quotientCast);
        }
Exemple #13
0
        public static byte8 gcd(byte8 x, byte8 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi8(x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi8(y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero);

                v128 doneMask = any_zero;

                byte8 shift = tzcnt(x | y);

                x = shrl(x, tzcnt(x));

                do
                {
                    y = shrl(y, tzcnt(y));

                    v128 tempX = x;

                    x = Sse2.min_epu8(x, y);
                    y = Sse2.max_epu8(y, tempX);

                    y -= x;

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi8(y, ZERO));
                    result   = Mask.BlendV(result, x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (-1 != doneMask.SLong0);

                result = shl(result, shift);

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(result);
            }
            else
            {
                return(new byte8((byte)gcd((uint)x.x0, (uint)y.x0),
                                 (byte)gcd((uint)x.x1, (uint)y.x1),
                                 (byte)gcd((uint)x.x2, (uint)y.x2),
                                 (byte)gcd((uint)x.x3, (uint)y.x3),
                                 (byte)gcd((uint)x.x4, (uint)y.x4),
                                 (byte)gcd((uint)x.x5, (uint)y.x5),
                                 (byte)gcd((uint)x.x6, (uint)y.x6),
                                 (byte)gcd((uint)x.x7, (uint)y.x7)));
            }
        }
Exemple #14
0
 public static byte8 andnot(byte8 left, byte8 right)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.andnot_si128(right, left));
     }
     else
     {
         return(left & ~right);
     }
 }
Exemple #15
0
 public DebuggerProxy(byte8 v)
 {
     x0 = v.x0;
     x1 = v.x1;
     x2 = v.x2;
     x3 = v.x3;
     x4 = v.x4;
     x5 = v.x5;
     x6 = v.x6;
     x7 = v.x7;
 }
 public static uint sad(byte8 a, byte8 b)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.sad_epu8(a, b).UShort0);
     }
     else
     {
         return((uint)(((math.abs(a.x0 - b.x0) + math.abs(a.x1 - b.x1)) + (math.abs(a.x2 - b.x2) + math.abs(a.x3 - b.x3))) + ((math.abs(a.x4 - b.x4) + math.abs(a.x5 - b.x5)) + (math.abs(a.x6 - b.x6) + math.abs(a.x7 - b.x7)))));
     }
 }
Exemple #17
0
 public static bool any(byte8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(0 != ((v128)x).ULong0);
     }
     else
     {
         return(any(x != 0));
     }
 }
Exemple #18
0
 public static bool all_eq(byte8 c)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(((byte8)Ssse3.shuffle_epi8(c, default(v128))).Equals(c));
     }
     else
     {
         return(((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & c.x0 == c.x7));
     }
 }
Exemple #19
0
 public static byte8 avg(byte8 x, byte8 y)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu8(x, y));
     }
     else
     {
         return(new byte8((byte)((x.x0 + y.x0 + 1) >> 1), (byte)((x.x1 + y.x1 + 1) >> 1), (byte)((x.x2 + y.x2 + 1) >> 1), (byte)((x.x3 + y.x3 + 1) >> 1), (byte)((x.x4 + y.x4 + 1) >> 1), (byte)((x.x5 + y.x5 + 1) >> 1), (byte)((x.x6 + y.x6 + 1) >> 1), (byte)((x.x7 + y.x7 + 1) >> 1)));
     }
 }
Exemple #20
0
 public static quarter8 asquarter(byte8 x)
 {
     if (Sse.IsSseSupported)
     {
         return((v128)x);
     }
     else
     {
         return(*(quarter8 *)&x);
     }
 }
Exemple #21
0
 public static byte8 max(byte8 a, byte8 b)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.max_epu8(a, b));
     }
     else
     {
         return(new byte8((byte)math.max((uint)a.x0, (uint)b.x0), (byte)math.max((uint)a.x1, (uint)b.x1), (byte)math.max((uint)a.x2, (uint)b.x2), (byte)math.max((uint)a.x3, (uint)b.x3), (byte)math.max((uint)a.x4, (uint)b.x4), (byte)math.max((uint)a.x5, (uint)b.x5), (byte)math.max((uint)a.x6, (uint)b.x6), (byte)math.max((uint)a.x7, (uint)b.x7)));
     }
 }
Exemple #22
0
 public static uint csum(byte8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(sad(x, byte8.zero));
     }
     else
     {
         return((uint)(((x.x0 + x.x1) + (x.x2 + x.x3)) + ((x.x4 + x.x5) + (x.x6 + x.x7))));
     }
 }
Exemple #23
0
 public static byte8 subadd(byte8 a, byte8 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi8(b, new byte8(255, 1, 255, 1, 255, 1, 255, 1)));
     }
     else
     {
         return(a - select(b, (byte8)(-(sbyte8)b), new bool8(false, true, false, true, false, true, false, true)));
     }
 }
Exemple #24
0
 public static byte8 countbits(byte8 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return((v128)countbits((byte16)(v128)x));
     }
     else
     {
         return(new byte8((byte)math.countbits((uint)x.x0), (byte)math.countbits((uint)x.x1), (byte)math.countbits((uint)x.x2), (byte)math.countbits((uint)x.x3), (byte)math.countbits((uint)x.x4), (byte)math.countbits((uint)x.x5), (byte)math.countbits((uint)x.x6), (byte)math.countbits((uint)x.x7)));
     }
 }
Exemple #25
0
 public static bool all(byte8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(0 == Sse2.cmpeq_epi8(x, default(v128)).ULong0);
     }
     else
     {
         return(all(x != 0));
     }
 }
 public static byte8 divrem(byte8 dividend, byte divisor, out byte8 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (byte8)divisor, out remainder));
     }
 }
Exemple #27
0
        public static byte2x4 operator %(byte2x4 left, byte2x4 right)
        {
            if (Sse2.IsSse2Supported)
            {
                byte8 rem = new byte8(left.c0, left.c1, left.c2, left.c3) % new byte8(right.c0, right.c1, right.c2, right.c3);

                return(new byte2x4(rem.v2_0, rem.v2_2, rem.v2_4, rem.v2_6));
            }
            else
            {
                return(new byte2x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3));
            }
        }
Exemple #28
0
        public static byte2x4 operator /(byte2x4 left, byte2x4 right)
        {
            if (Sse2.IsSse2Supported)
            {
                byte8 div = new byte8(left.c0, left.c1, left.c2, left.c3) / new byte8(right.c0, right.c1, right.c2, right.c3);

                return(new byte2x4(div.v2_0, div.v2_2, div.v2_4, div.v2_6));
            }
            else
            {
                return(new byte2x4(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2, left.c3 / right.c3));
            }
        }
Exemple #29
0
 public static bool8 ispow2(byte8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.and_si128(Sse2.and_si128(Operator.greater_mask_byte(x, byte8.zero),
                                              Sse2.cmpeq_epi8(default(v128), x & (x - 1))),
                               new byte16(1)));
     }
     else
     {
         return(new bool8(math.ispow2((uint)x.x0), math.ispow2((uint)x.x1), math.ispow2((uint)x.x2), math.ispow2((uint)x.x3), math.ispow2((uint)x.x4), math.ispow2((uint)x.x5), math.ispow2((uint)x.x6), math.ispow2((uint)x.x7)));
     }
 }
Exemple #30
0
        public byte8 NextByte8(byte8 max)
        {
            if (Ssse3.IsSsse3Supported)
            {
                short8 temp = (short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState());

                return(Ssse3.shuffle_epi8(temp, new byte8(1, 3, 5, 7, 9, 11, 13, 15)));
            }
            else
            {
                return((byte8)(((short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState())) >> 8));
            }
        }