Beispiel #1
0
        internal static byte8 vrem_byte_SSE_FALLBACK(byte8 dividend, byte8 divisor)
        {
            Assert.AreNotEqual(divisor.x0, 0);
            Assert.AreNotEqual(divisor.x1, 0);
            Assert.AreNotEqual(divisor.x2, 0);
            Assert.AreNotEqual(divisor.x3, 0);
            Assert.AreNotEqual(divisor.x4, 0);
            Assert.AreNotEqual(divisor.x5, 0);
            Assert.AreNotEqual(divisor.x6, 0);
            Assert.AreNotEqual(divisor.x7, 0);

            if (Sse2.IsSse2Supported)
            {
                ushort8 remainders = ushort8.zero;

                ushort8 divisorCast  = divisor;
                ushort8 dividendCast = dividend;


                remainders |= (new ushort8(1) & (dividendCast >> 7));

                v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);

                for (int i = 6; i > 0; i--)
                {
                    remainders <<= 1;

                    remainders |= (new ushort8(1) & (dividendCast >> i));

                    subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisorCast, remainders), divisorCast);

                    remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                }

                remainders <<= 1;

                remainders |= new ushort8(1) & dividendCast;

                subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);


                return((byte8)remainders);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #2
0
 public static bool4 ispow2(short4 x)
 {
     if (Sse2.IsSse2Supported)
     {
         v128 result = (byte4)(new short4(1) & Sse2.and_si128(Sse2.cmpgt_epi16(x, default(v128)),
                                                              Sse2.cmpeq_epi16(default(v128), x & (x - 1))));
         return(*(bool4 *)&result);
     }
     else
     {
         return(new bool4(math.ispow2(x.x), math.ispow2(x.y), math.ispow2(x.z), math.ispow2(x.w)));
     }
 }
Beispiel #3
0
 public static bool4 ispow2(ushort4 x)
 {
     if (Sse2.IsSse2Supported)
     {
         v128 result = (byte4)(new ushort4(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)),
                                                               Sse2.cmpeq_epi16(default(v128), x & (x - 1))));
         return(*(bool4 *)&result);
     }
     else
     {
         return(new bool4(math.ispow2((uint)x.x), math.ispow2((uint)x.y), math.ispow2((uint)x.z), math.ispow2((uint)x.w)));
     }
 }
Beispiel #4
0
        public static int3 addsub(int3 a, int3 b)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(1, uint.MaxValue, 1, uint.MaxValue));

                return(a + *(int3 *)&temp);
            }
            else
            {
                return(a + math.select(b, -b, new bool3(false, true, false)));
            }
        }
Beispiel #5
0
        public static uint4 addsub(uint4 a, uint4 b)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(1, uint.MaxValue, 1, uint.MaxValue));

                return(a + *(uint4 *)&temp);
            }
            else
            {
                return(a + math.select(b, (uint4)(-(int4)b), new bool4(false, true, false, true)));
            }
        }
Beispiel #6
0
        internal static int v192(v256 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                v128 hi = ((long2)Avx2.mm256_extracti128_si256(x, 1)).xx;

                return(v64(Sse2.xor_si128(Avx.mm256_castsi256_si128(x), hi)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #7
0
        internal static int v128(v128 x)
        {
            if (Sse2.IsSse2Supported)
            {
                x = Sse2.xor_si128(x, Sse2.shuffle_epi32(x, Sse.SHUFFLE(0, 0, 3, 2)));

                return(Sse2.xor_si128(x, Sse2.shufflelo_epi16(x, Sse.SHUFFLE(0, 0, 3, 2))).SInt0);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #8
0
        public static int4 sign(int4 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 temp = Ssse3.sign_epi32(new v128(1), *(v128 *)&x);

                return(*(int4 *)&temp);
            }
            else
            {
                return((x >> 31) | (int4)((uint4)(-x) >> 31));
            }
        }
Beispiel #9
0
        public static double2 nabs(double2 x)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 _nabs = Sse2.or_pd(*(v128 *)&x, new v128(1L << 63));

                return(*(double2 *)&_nabs);
            }
            else
            {
                return(new double2(nabs(x.x), nabs(x.y)));
            }
        }
Beispiel #10
0
 public static bool2 ispow2(long2 x)
 {
     if (Sse4_2.IsSse42Supported)
     {
         v128 result = (byte2)(new long2(1) & Sse2.and_si128(Operator.greater_mask_long(x, default(v128)),
                                                             Operator.equals_mask_long(default(v128), x & (x - 1))));
         return(*(bool2 *)&result);
     }
     else
     {
         return(new bool2(ispow2(x.x), ispow2(x.y)));
     }
 }
Beispiel #11
0
        private static bool2 TestIsFalse(v128 input)
        {
            if (Sse2.IsSse2Supported)
            {
                input = Sse2.andnot_si128((byte2)(ushort2)input, new ushort2(0x0101));

                return(*(bool2 *)&input);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #12
0
        internal static v128 Long2To_U_Short2_SSE2(v128 x)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 y_shifted = Sse2.bsrli_si128(x, 1 * sizeof(long));

                return(Sse2.unpacklo_epi16(x, y_shifted));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #13
0
        private static bool2 TestIsTrue(v128 input)
        {
            if (Sse2.IsSse2Supported)
            {
                int cast = 0x0101 & Sse2.movemask_epi8(input);

                return(*(bool2 *)&cast);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #14
0
        public static int4 andnot(int4 left, int4 right)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 temp = Sse2.andnot_si128(*(v128 *)&right, *(v128 *)&left);

                return(*(int4 *)&temp);
            }
            else
            {
                return(left & ~right);
            }
        }
Beispiel #15
0
        public static int4 subadd(int4 a, int4 b)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(uint.MaxValue, 1, uint.MaxValue, 1));

                return(a + *(int4 *)&temp);
            }
            else
            {
                return(a - math.select(b, -b, new bool4(false, true, false, true)));
            }
        }
Beispiel #16
0
        private unsafe static string FormatVectorFailure128(v128 expected, v128 result)
        {
            var b = new StringBuilder();

            b.AppendLine("128-bit vectors differ!");
            b.AppendLine("Expected:");
            FormatVector(b, (void *)&expected, 16);
            b.AppendLine();
            b.AppendLine("But was :");
            FormatVector(b, (void *)&result, 16);
            b.AppendLine();
            return(b.ToString());
        }
Beispiel #17
0
        public static double2 subadd(double2 a, double2 b)
        {
            if (Sse3.IsSse3Supported)
            {
                v128 temp = Sse3.addsub_pd(*(v128 *)&a, *(v128 *)&b);

                return(*(double2 *)&temp);
            }
            else
            {
                return(a - math.select(b, -b, new bool2(false, true)));
            }
        }
Beispiel #18
0
        public static float4 subadd(float4 a, float4 b)
        {
            if (Sse3.IsSse3Supported)
            {
                v128 temp = Sse3.addsub_ps(*(v128 *)&a, *(v128 *)&b);

                return(*(float4 *)&temp);
            }
            else
            {
                return(a - math.select(b, -b, new bool4(false, true, false, true)));
            }
        }
Beispiel #19
0
        public static uint3 subadd(uint3 a, uint3 b)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(uint.MaxValue, 1, uint.MaxValue, 1));

                return(a + *(uint3 *)&temp);
            }
            else
            {
                return(a - math.select(b, (uint3)(-(int3)b), new bool3(false, true, false)));
            }
        }
Beispiel #20
0
        private static bool2 TestIsFalse(v128 input)
        {
            if (Sse2.IsSse2Supported)
            {
                int result = maxmath.andnot(0x0101, Sse2.movemask_epi8(input));

                return(*(bool2 *)&result);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #21
0
        public static uint2 subadd(uint2 a, uint2 b)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 temp = Ssse3.sign_epi32(*(v128 *)&b, new v128(uint.MaxValue, 1, 0, 0));

                return(a + *(uint2 *)&temp);
            }
            else
            {
                return(a - math.select(b, (uint2)(-(int2)b), new bool2(false, true)));
            }
        }
Beispiel #22
0
        public static float4 div(float4 dividend, float4 divisor)
        {
            if (Sse.IsSseSupported)
            {
                v128 temp = Sse.mul_ps(*(v128 *)&dividend, Sse.rcp_ps(*(v128 *)&divisor));

                return(*(float4 *)&temp);
            }
            else
            {
                return(dividend / divisor);
            }
        }
Beispiel #23
0
        internal static v128 Int2To_S_Byte2_SSE2(v128 x)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 y_shifted = Sse2.bsrli_si128(x, 1 * sizeof(int));

                return(Sse2.unpacklo_epi8(x, y_shifted));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #24
0
        internal static sbyte2 vdivrem_sbyte(sbyte2 dividend, sbyte2 divisor, out sbyte2 remainder)
        {
            Assert.AreNotEqual(divisor.x, 0);
            Assert.AreNotEqual(divisor.y, 0);

            int2 castDividend = dividend;
            int2 castDivisor  = divisor;
            v128 floatResult  = vdiv_byte_quotient(castDividend, castDivisor);
            int2 quotientCast = (int2)(*(float2 *)&floatResult);

            remainder = (sbyte2)(castDividend - quotientCast * castDivisor);
            return((sbyte2)quotientCast);
        }
Beispiel #25
0
        internal static sbyte4 vrem_sbyte(sbyte4 dividend, sbyte4 divisor)
        {
            Assert.AreNotEqual(divisor.x, 0);
            Assert.AreNotEqual(divisor.y, 0);
            Assert.AreNotEqual(divisor.z, 0);
            Assert.AreNotEqual(divisor.w, 0);

            int4 castDividend = dividend;
            int4 castDivisor  = divisor;
            v128 floatResult  = vdiv_byte_quotient(castDividend, castDivisor);

            return((sbyte4)(castDividend - ((int4)(*(float4 *)&floatResult) * castDivisor)));
        }
Beispiel #26
0
        public static float4 nabs(float4 x)
        {
            if (Sse.IsSseSupported)
            {
                v128 _nabs = Sse.or_ps(*(v128 *)&x, new v128(1 << 31));

                return(*(float4 *)&_nabs);
            }
            else
            {
                return(new float4(nabs(x.x), nabs(x.y), nabs(x.z), nabs(x.w)));
            }
        }
Beispiel #27
0
        internal static v128 greater_mask_uint(v128 left, v128 right)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 mask = new v128(1 << 31);

                return(Sse2.cmpgt_epi32(Sse2.xor_si128(left, mask),
                                        Sse2.xor_si128(right, mask)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
Beispiel #28
0
        public static byte16 bitmask8(byte16 numBits, byte16 index = default(byte16))
        {
            Assert.IsBetween(index.x0, 0u, 8u);
            Assert.IsBetween(index.x1, 0u, 8u);
            Assert.IsBetween(index.x2, 0u, 8u);
            Assert.IsBetween(index.x3, 0u, 8u);
            Assert.IsBetween(index.x4, 0u, 8u);
            Assert.IsBetween(index.x5, 0u, 8u);
            Assert.IsBetween(index.x6, 0u, 8u);
            Assert.IsBetween(index.x7, 0u, 8u);
            Assert.IsBetween(index.x8, 0u, 8u);
            Assert.IsBetween(index.x9, 0u, 8u);
            Assert.IsBetween(index.x10, 0u, 8u);
            Assert.IsBetween(index.x11, 0u, 8u);
            Assert.IsBetween(index.x12, 0u, 8u);
            Assert.IsBetween(index.x13, 0u, 8u);
            Assert.IsBetween(index.x14, 0u, 8u);
            Assert.IsBetween(index.x15, 0u, 8u);
            Assert.IsBetween(numBits.x0, 0u, 8u - index.x0);
            Assert.IsBetween(numBits.x1, 0u, 8u - index.x1);
            Assert.IsBetween(numBits.x2, 0u, 8u - index.x2);
            Assert.IsBetween(numBits.x3, 0u, 8u - index.x3);
            Assert.IsBetween(numBits.x4, 0u, 8u - index.x4);
            Assert.IsBetween(numBits.x5, 0u, 8u - index.x5);
            Assert.IsBetween(numBits.x6, 0u, 8u - index.x6);
            Assert.IsBetween(numBits.x7, 0u, 8u - index.x7);
            Assert.IsBetween(numBits.x8, 0u, 8u - index.x8);
            Assert.IsBetween(numBits.x9, 0u, 8u - index.x9);
            Assert.IsBetween(numBits.x10, 0u, 8u - index.x10);
            Assert.IsBetween(numBits.x11, 0u, 8u - index.x11);
            Assert.IsBetween(numBits.x12, 0u, 8u - index.x12);
            Assert.IsBetween(numBits.x13, 0u, 8u - index.x13);
            Assert.IsBetween(numBits.x14, 0u, 8u - index.x14);
            Assert.IsBetween(numBits.x15, 0u, 8u - index.x15);


            if (Sse2.IsSse2Supported)
            {
                // mask
                index = shl(byte.MaxValue, index);

                v128 isMaxBitsMask = Sse2.cmpeq_epi8(numBits, new byte16(8));

                return(isMaxBitsMask | andnot(index, shl(index, numBits)));
            }
            else
            {
                return(new byte16(bitmask8(numBits.v8_0, index.v8_0), bitmask8(numBits.v8_8, index.v8_8)));
            }
        }
Beispiel #29
0
        internal static byte3 vdivrem_byte(byte3 dividend, byte3 divisor, out byte3 remainder)
        {
            Assert.AreNotEqual(divisor.x, 0);
            Assert.AreNotEqual(divisor.y, 0);
            Assert.AreNotEqual(divisor.z, 0);

            int3 castDividend = dividend;
            int3 castDivisor  = divisor;
            v128 floatResult  = vdiv_byte_quotient(castDividend, castDivisor);
            int3 quotientCast = (int3)(*(float3 *)&floatResult);

            remainder = (byte3)(castDividend - quotientCast * castDivisor);
            return((byte3)quotientCast);
        }
Beispiel #30
0
        public static void Cast_ToV128()
        {
            bool result = true;

            for (int i = 0; i < NUM_TESTS; i++)
            {
                v128 x = TestData_LHS[i];

                result &= x.Byte0 == TestData_LHS[i].x &
                          x.Byte1 == TestData_LHS[i].y;
            }

            Assert.AreEqual(true, result);
        }