internal static v128 mul_long(v128 left, v128 right)
        {
            if (Sse4_1.IsSse41Supported)
            {
                v128 product_Hi = Sse2.mul_epu32(left, right);

                v128 product_Lo = Sse4_1.mullo_epi32(left, Sse2.shuffle_epi32(right, Sse.SHUFFLE(2, 3, 0, 1)));
                product_Lo = Ssse3.hadd_epi32(product_Lo, default(v128));
                product_Lo = Sse2.shuffle_epi32(product_Lo, Sse.SHUFFLE(1, 3, 0, 3));

                return(Sse2.add_epi64(product_Lo, product_Hi));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 ac = Sse2.mul_epu32(left, right);
                v128 b  = Sse2.srli_epi64(left, 32);

                v128 bc = Sse2.mul_epu32(b, right);
                v128 d  = Sse2.srli_epi64(right, 32);
                v128 ad = Sse2.mul_epu32(left, d);

                v128 hi = Sse2.add_epi64(bc, ad);
                hi = Sse2.slli_epi64(hi, 32);

                return(Sse2.add_epi64(hi, ac));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
示例#2
0
        public static uint cprod(ushort8 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                v128 prod = Avx.mm256_castsi256_si128((uint8)x * (uint8)(ushort8)Sse2.shuffle_epi32(x, Sse.SHUFFLE(0, 1, 2, 3)));
                prod = Sse4_1.mullo_epi32(prod, Sse2.shuffle_epi32(prod, Sse.SHUFFLE(0, 1, 2, 3)));

                return(Sse4_1.mullo_epi32(prod, Sse2.shufflelo_epi16(prod, Sse.SHUFFLE(0, 0, 3, 2))).UInt0);
            }
            else
            {
                return(cprod((uint4)x.v4_0 * (uint4)x.v4_4));
            }
        }
示例#3
0
        public static int cprod(int8 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                v128 result = Sse4_1.mullo_epi32(Avx.mm256_castsi256_si128(x),
                                                 Avx2.mm256_extracti128_si256(x, 1));

                result = Sse4_1.mullo_epi32(result, Sse2.shuffle_epi32(result, Sse.SHUFFLE(0, 1, 2, 3)));

                return(Sse4_1.mullo_epi32(result, Sse2.shufflelo_epi16(result, Sse.SHUFFLE(0, 0, 3, 2))).SInt0);
            }
            else
            {
                return(cprod(x.v4_0 * x.v4_4));
            }
        }
示例#4
0
        public static uint cprod(ushort16 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                v128 lo = x.v8_0;

                lo = Avx.mm256_castsi256_si128(((uint8)(ushort8)lo * (uint8)(ushort8)Sse2.shuffle_epi32(lo, Sse.SHUFFLE(0, 1, 2, 3)))
                                               *
                                               ((uint8)x.v8_8 * (uint8)(ushort8)Sse2.shuffle_epi32(x.v8_8, Sse.SHUFFLE(0, 1, 2, 3))));

                lo = Sse4_1.mullo_epi32(lo, Sse2.shuffle_epi32(lo, Sse.SHUFFLE(0, 1, 2, 3)));

                return(Sse4_1.mullo_epi32(lo, Sse2.shufflelo_epi16(lo, Sse.SHUFFLE(0, 0, 3, 2))).UInt0);
            }
            else
            {
                return(cprod((uint8)x.v8_0 * (uint8)x.v8_8));
            }
        }
        internal static v128 mul_int(v128 left, v128 right)
        {
            if (Sse4_1.IsSse41Supported)
            {
                return(Sse4_1.mullo_epi32(left, right));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 even = Sse2.mul_epu32(left, right);
                v128 odd  = Sse2.mul_epu32(Sse2.shuffle_epi32(left, Sse.SHUFFLE(3, 3, 1, 1)),
                                           Sse2.shuffle_epi32(right, Sse.SHUFFLE(3, 3, 1, 1)));

                return(Sse2.unpacklo_epi64(Sse2.unpacklo_epi32(even, odd),
                                           Sse2.unpackhi_epi32(even, odd)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }