Пример #1
0
        public byte8(byte3 x012, byte2 x34, byte3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 hi = Sse2.bslli_si128(x567, 2 * sizeof(byte));
                if (Sse4_1.IsSse41Supported)
                {
                    hi = Sse4_1.blend_epi16(x34, hi, 0b0110);
                }
                else
                {
                    hi = Mask.BlendEpi16_SSE2(x34, hi, 0b0110);
                }
                hi = Sse2.bslli_si128(hi, 3 * sizeof(byte));

                this = Mask.BlendV(x012, hi, new byte8(0, 0, 0, 255, 255, 255, 255, 255));
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x34.x;
                this.x4 = x34.y;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
Пример #2
0
 public byte2x4(byte v)
 {
     this.c0 = v;
     this.c1 = v;
     this.c2 = v;
     this.c3 = v;
 }
Пример #3
0
        public static byte2 tzcnt(byte2 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                     Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (byte2)(-(sbyte2)x);

                byte2 first  = Mask.BlendV(default(v128), new byte4(1), Sse2.cmpeq_epi8(compareMask, default(v128)));
                byte2 second = Mask.BlendV(default(v128), new byte4(4), Sse2.cmpeq_epi8(compareMask & (byte4)0x0F, default(v128)));
                byte2 third  = Mask.BlendV(default(v128), new byte4(2), Sse2.cmpeq_epi8(compareMask & (byte4)0x33, default(v128)));
                byte2 fourth = Mask.BlendV(default(v128), new byte4(1), Sse2.cmpeq_epi8(compareMask & (byte4)0x55, default(v128)));

                return((first + second) + (third + fourth));
            }
            else
            {
                return(new byte2(tzcnt(x.x), tzcnt(x.y)));
            }
        }
Пример #4
0
        public static byte2 reversebits(byte2 x)
        {
            x = ((x >> 1) & 0x55) | ((x & 0x55) << 1);
            x = ((x >> 2) & 0x33) | ((x & 0x33) << 2);

            return((x >> 4) | (x << 4));
        }
Пример #5
0
 public byte2x3(byte m00, byte m01, byte m02,
                byte m10, byte m11, byte m12)
 {
     this.c0 = new byte2(m00, m10);
     this.c1 = new byte2(m01, m11);
     this.c2 = new byte2(m02, m12);
 }
Пример #6
0
        public byte8(byte2 x01, byte3 x234, byte3 x567)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x234, 2 * sizeof(byte));
                v128 hi  = Sse2.bslli_si128(x567, 5 * sizeof(byte));

                hi = Mask.BlendV(mid, hi, new byte8(0, 0, 0, 0, 0, 255, 255, 255));

                if (Sse4_1.IsSse41Supported)
                {
                    this = Sse4_1.blend_epi16(x01, hi, 0b1110);
                }
                else
                {
                    this = Mask.BlendEpi16_SSE2(x01, hi, 0b1110);
                }
            }
            else
            {
                this.x0 = x01.x;
                this.x1 = x01.y;
                this.x2 = x234.x;
                this.x3 = x234.y;
                this.x4 = x234.z;
                this.x5 = x567.x;
                this.x6 = x567.y;
                this.x7 = x567.z;
            }
        }
Пример #7
0
 public byte2x4(byte2 c0, byte2 c1, byte2 c2, byte2 c3)
 {
     this.c0 = c0;
     this.c1 = c1;
     this.c2 = c2;
     this.c3 = c3;
 }
Пример #8
0
        public static byte2 lcm(sbyte2 x, sbyte2 y)
        {
            byte2 absX = (byte2)abs(x);
            byte2 absY = (byte2)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
Пример #9
0
        public byte8(byte3 x012, byte3 x345, byte2 x67)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mid = Sse2.bslli_si128(x345, 3 * sizeof(byte));
                v128 hi  = Sse2.bslli_si128(x67, 6 * sizeof(byte));

                mid = Mask.BlendV(x012, mid, new byte8(0, 0, 0, 255, 255, 255, 0, 0));

                if (Sse4_1.IsSse41Supported)
                {
                    this = Sse4_1.blend_epi16(mid, hi, 0b1000);
                }
                else
                {
                    this = Mask.BlendEpi16_SSE2(mid, hi, 0b1000);
                }
            }
            else
            {
                this.x0 = x012.x;
                this.x1 = x012.y;
                this.x2 = x012.z;
                this.x3 = x345.x;
                this.x4 = x345.y;
                this.x5 = x345.z;
                this.x6 = x67.x;
                this.x7 = x67.y;
            }
        }
Пример #10
0
        public static byte2 floorpow2(byte2 x)
        {
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;

            return(x - (x >> 1));
        }
Пример #11
0
 public byte2x4(byte m00, byte m01, byte m02, byte m03,
                byte m10, byte m11, byte m12, byte m13)
 {
     this.c0 = new byte2(m00, m10);
     this.c1 = new byte2(m01, m11);
     this.c2 = new byte2(m02, m12);
     this.c3 = new byte2(m03, m13);
 }
Пример #12
0
        public static byte2 ceilpow2(byte2 x)
        {
            x -= 1;
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;

            return(x + 1);
        }
Пример #13
0
        internal static byte2 vdiv_byte(byte2 dividend, byte2 divisor)
        {
            Assert.AreNotEqual(divisor.x, 0);
            Assert.AreNotEqual(divisor.y, 0);

            v128 floatResult = vdiv_byte_quotient((int2)dividend, (int2)divisor);

            return((byte2)(*(float2 *)&floatResult));
        }
Пример #14
0
 public static byte2 andnot(byte2 left, byte2 right)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.andnot_si128(right, left));
     }
     else
     {
         return(left & ~right);
     }
 }
Пример #15
0
 public static bool any(byte2 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(0 != Sse2.extract_epi16(x, 0));
     }
     else
     {
         return(math.any(x != 0));
     }
 }
Пример #16
0
 public static quarter2 asquarter(byte2 x)
 {
     if (Sse.IsSseSupported)
     {
         return((v128)x);
     }
     else
     {
         return(*(quarter2 *)&x);
     }
 }
Пример #17
0
 public static byte2 avg(byte2 x, byte2 y)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu8(x, y));
     }
     else
     {
         return(new byte2((byte)((x.x + y.x + 1) >> 1), (byte)((x.y + y.y + 1) >> 1)));
     }
 }
Пример #18
0
 public static byte avg(byte2 c)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu8(c, Sse2.bsrli_si128(c, 1 * sizeof(byte))).Byte0);
     }
     else
     {
         return((byte)((1u + csum(c)) / 2u));
     }
 }
Пример #19
0
 public static byte2 max(byte2 a, byte2 b)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.max_epu8(a, b));
     }
     else
     {
         return(new byte2((byte)math.max((uint)a.x, (uint)b.x), (byte)math.max((uint)a.y, (uint)b.y)));
     }
 }
Пример #20
0
 public static byte cmin(byte2 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(min(x, x.yy).x);
     }
     else
     {
         return((byte)math.min((uint)x.x, (uint)x.y));
     }
 }
Пример #21
0
 public static bool all(byte2 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(0 == Sse2.extract_epi16(Sse2.cmpeq_epi8(x, default(v128)), 0));
     }
     else
     {
         return(math.all(x != 0));
     }
 }
Пример #22
0
 public static byte2 subadd(byte2 a, byte2 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi8(b, new byte2(255, 1)));
     }
     else
     {
         return(a - select(b, (byte2)(-(sbyte2)b), new bool2(false, true)));
     }
 }
Пример #23
0
        internal static byte2 vrem_byte(byte2 dividend, byte2 divisor)
        {
            Assert.AreNotEqual(divisor.x, 0);
            Assert.AreNotEqual(divisor.y, 0);

            int2 castDividend = dividend;
            int2 castDivisor  = divisor;
            v128 floatResult  = vdiv_byte_quotient(castDividend, castDivisor);

            return((byte2)(castDividend - ((int2)(*(float2 *)&floatResult) * castDivisor)));
        }
Пример #24
0
 public static byte2 divrem(byte2 dividend, byte divisor, out byte2 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (byte2)divisor, out remainder));
     }
 }
Пример #25
0
 public static byte2 divrem(byte2 dividend, byte2 divisor, out byte2 remainder)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Operator.vdivrem_byte(dividend, divisor, out remainder));
     }
     else
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
 }
Пример #26
0
        internal static byte2 vdivrem_byte(byte2 dividend, byte2 divisor, out byte2 remainder)
        {
            Assert.AreNotEqual(divisor.x, 0);
            Assert.AreNotEqual(divisor.y, 0);

            int2 castDividend = dividend;
            int2 castDivisor  = divisor;
            v128 floatResult  = vdiv_byte_quotient(castDividend, castDivisor);
            int2 quotientCast = (int2)(*(float2 *)&floatResult);

            remainder = (byte2)(castDividend - quotientCast * castDivisor);
            return((byte2)quotientCast);
        }
Пример #27
0
        public byte2 NextByte(byte2 max)
        {
            if (Ssse3.IsSsse3Supported)
            {
                short2 temp = (short2)max * new short2(NextState(), NextState());

                return(Ssse3.shuffle_epi8(temp, new byte4(1, 3, 0, 0)));
            }
            else
            {
                return((byte2)(((short2)max * new short2(NextState(), NextState())) >> 8));
            }
        }
Пример #28
0
 public static bool2 ispow2(byte2 x)
 {
     if (Sse2.IsSse2Supported)
     {
         v128 result = Sse2.and_si128(Sse2.and_si128(Operator.greater_mask_byte(x, default(v128)),
                                                     Sse2.cmpeq_epi8(default(v128), x & (x - 1))),
                                      new byte16(1));
         return(*(bool2 *)&result);
     }
     else
     {
         return(new bool2(math.ispow2((uint)x.x), math.ispow2((uint)x.y)));
     }
 }
        public static uint sad(byte2 a, byte2 b)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 maskedA = Sse2.and_si128(a, new v128(maxmath.bitmask32(16), 0, 0, 0));
                v128 maskedB = Sse2.and_si128(b, new v128(maxmath.bitmask32(16), 0, 0, 0));

                return(Sse2.sad_epu8(maskedA, maskedB).UShort0);
            }
            else
            {
                return((uint)(math.abs(a.x - b.x) + math.abs(a.y - b.y)));
            }
        }
Пример #30
0
        public static byte2 gcd(byte2 x, byte2 y)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 ZERO = default(v128);

                v128 result             = ZERO;
                v128 result_if_zero_any = ZERO;

                v128 x_is_zero = Sse2.cmpeq_epi8(x, ZERO);
                v128 y_is_zero = Sse2.cmpeq_epi8(y, ZERO);
                v128 any_zero  = Sse2.or_si128(x_is_zero, y_is_zero);

                result_if_zero_any = Mask.BlendV(result_if_zero_any, y, x_is_zero);
                result_if_zero_any = Mask.BlendV(result_if_zero_any, x, y_is_zero);

                v128 doneMask = any_zero;

                byte2 shift = tzcnt(x | y);

                x = shrl(x, tzcnt(x));

                do
                {
                    y = shrl(y, tzcnt(y));

                    v128 tempX = x;

                    x = Sse2.min_epu8(x, y);
                    y = Sse2.max_epu8(y, tempX);

                    y -= x;

                    v128 loopCheck = Sse2.andnot_si128(doneMask, Sse2.cmpeq_epi8(y, ZERO));
                    result   = Mask.BlendV(result, x, loopCheck);
                    doneMask = Sse2.or_si128(doneMask, loopCheck);
                } while (-1 != doneMask.SShort0);

                result = shl(result, shift);

                result = Mask.BlendV(result, result_if_zero_any, any_zero);

                return(result);
            }
            else
            {
                return(new byte2((byte)gcd((uint)x.x, (uint)y.x), (byte)gcd((uint)x.y, (uint)y.y)));
            }
        }