internal static v128 mul_long(v128 left, v128 right)
        {
            if (Sse4_1.IsSse41Supported)
            {
                v128 product_Hi = Sse2.mul_epu32(left, right);

                v128 product_Lo = Sse4_1.mullo_epi32(left, Sse2.shuffle_epi32(right, Sse.SHUFFLE(2, 3, 0, 1)));
                product_Lo = Ssse3.hadd_epi32(product_Lo, default(v128));
                product_Lo = Sse2.shuffle_epi32(product_Lo, Sse.SHUFFLE(1, 3, 0, 3));

                return(Sse2.add_epi64(product_Lo, product_Hi));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 ac = Sse2.mul_epu32(left, right);
                v128 b  = Sse2.srli_epi64(left, 32);

                v128 bc = Sse2.mul_epu32(b, right);
                v128 d  = Sse2.srli_epi64(right, 32);
                v128 ad = Sse2.mul_epu32(left, d);

                v128 hi = Sse2.add_epi64(bc, ad);
                hi = Sse2.slli_epi64(hi, 32);

                return(Sse2.add_epi64(hi, ac));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }