コード例 #1
0
ファイル: Bitmask.cs プロジェクト: csritter/MaxMath
        public static ushort8 bitmask16(ushort8 numBits, ushort8 index = default(ushort8))
        {
            Assert.IsBetween(index.x0, 0u, 16u);
            Assert.IsBetween(index.x1, 0u, 16u);
            Assert.IsBetween(index.x2, 0u, 16u);
            Assert.IsBetween(index.x3, 0u, 16u);
            Assert.IsBetween(index.x4, 0u, 16u);
            Assert.IsBetween(index.x5, 0u, 16u);
            Assert.IsBetween(index.x6, 0u, 16u);
            Assert.IsBetween(index.x7, 0u, 16u);
            Assert.IsBetween(numBits.x0, 0u, 16u - index.x0);
            Assert.IsBetween(numBits.x1, 0u, 16u - index.x1);
            Assert.IsBetween(numBits.x2, 0u, 16u - index.x2);
            Assert.IsBetween(numBits.x3, 0u, 16u - index.x3);
            Assert.IsBetween(numBits.x4, 0u, 16u - index.x4);
            Assert.IsBetween(numBits.x5, 0u, 16u - index.x5);
            Assert.IsBetween(numBits.x6, 0u, 16u - index.x6);
            Assert.IsBetween(numBits.x7, 0u, 16u - index.x7);


            if (Sse2.IsSse2Supported)
            {
                // mask
                index = shl(ushort.MaxValue, index);

                v128 isMaxBitsMask = Sse2.cmpeq_epi16(numBits, new ushort8(16));

                return(isMaxBitsMask | andnot(index, shl(index, numBits)));
            }
            else
            {
                return((ushort8)(-toint16(numBits == 16)) | andnot(index, shl(index, numBits)));
            }
        }
コード例 #2
0
        internal static byte8 vdivrem_byte_SSE_FALLBACK(byte8 dividend, byte8 divisor, out byte8 remainder)
        {
            Assert.AreNotEqual(divisor.x0, 0);
            Assert.AreNotEqual(divisor.x1, 0);
            Assert.AreNotEqual(divisor.x2, 0);
            Assert.AreNotEqual(divisor.x3, 0);
            Assert.AreNotEqual(divisor.x4, 0);
            Assert.AreNotEqual(divisor.x5, 0);
            Assert.AreNotEqual(divisor.x6, 0);
            Assert.AreNotEqual(divisor.x7, 0);

            if (Sse2.IsSse2Supported)
            {
                ushort8 quotients  = ushort8.zero;
                ushort8 remainders = ushort8.zero;

                ushort8 divisorCast  = divisor;
                ushort8 dividendCast = dividend;


                remainders |= (new ushort8(1) & (dividendCast >> 7));

                v128 subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                quotients  |= new ushort8(1) & subtractDivisorFromRemainder;

                for (int i = 6; i > 0; i--)
                {
                    quotients  <<= 1;
                    remainders <<= 1;

                    remainders |= (new ushort8(1) & (dividendCast >> i));

                    subtractDivisorFromRemainder = Sse2.cmpeq_epi8(maxmath.min(divisorCast, remainders), divisorCast);

                    remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                    quotients  |= new ushort8(1) & subtractDivisorFromRemainder;
                }

                remainders <<= 1;
                quotients  <<= 1;

                remainders |= new ushort8(1) & dividendCast;

                subtractDivisorFromRemainder = Sse2.cmpeq_epi16(maxmath.min(divisorCast, remainders), divisorCast);

                remainders -= Mask.BlendV(default(v128), divisorCast, subtractDivisorFromRemainder);
                quotients  |= new ushort8(1) & subtractDivisorFromRemainder;


                byte16 temp = Sse2.packus_epi16(remainders, quotients);
                remainder = temp.v8_0;
                return(temp.v8_8);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
コード例 #3
0
        public static ushort8 tzcnt(ushort8 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(16, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(16, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                v128 tzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                                 Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))));

                return(Sse2.min_epu8(tzcnt_bytes,
                                     Sse2.srli_epi16(Sse2.add_epi8(tzcnt_bytes, Sse2.set1_epi8(8)), 8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (ushort8)(-((short8)x));

                ushort8 first  = Mask.BlendV(default(v128), new ushort8(1), Sse2.cmpeq_epi16(compareMask, default(v128)));
                ushort8 second = Mask.BlendV(default(v128), new ushort8(8), Sse2.cmpeq_epi16(compareMask & (ushort8)0x00FF, default(v128)));
                ushort8 third  = Mask.BlendV(default(v128), new ushort8(4), Sse2.cmpeq_epi16(compareMask & (ushort8)0x0F0F, default(v128)));
                ushort8 fourth = Mask.BlendV(default(v128), new ushort8(2), Sse2.cmpeq_epi16(compareMask & (ushort8)0x3333, default(v128)));
                ushort8 fifth  = Mask.BlendV(default(v128), new ushort8(1), Sse2.cmpeq_epi16(compareMask & (ushort8)0x5555, default(v128)));

                return((first + second) + ((third + fourth) + fifth));
            }
            else
            {
                return(new ushort8(tzcnt(x.x0), tzcnt(x.x1), tzcnt(x.x2), tzcnt(x.x3), tzcnt(x.x4), tzcnt(x.x5), tzcnt(x.x6), tzcnt(x.x7)));
            }
        }
コード例 #4
0
ファイル: ushort3x4.cs プロジェクト: csritter/MaxMath
        public static ushort3x4 operator %(ushort3x4 left, ushort3x4 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                ushort8 dividend_lo = Sse2.unpacklo_epi64(left.c0, left.c1);
                ushort8 dividend_hi = Sse2.unpacklo_epi64(left.c2, left.c3);

                ushort8 divisor_lo = Sse2.unpacklo_epi64(right.c0, right.c1);
                ushort8 divisor_hi = Sse2.unpacklo_epi64(right.c2, right.c3);
#if DEBUG
                divisor_lo.x3 = 1;
                divisor_lo.x7 = 1;
                divisor_hi.x3 = 1;
                divisor_hi.x7 = 1;
#endif
                ushort8 rem_lo = dividend_lo % divisor_lo;
                ushort8 rem_hi = dividend_hi % divisor_hi;

                return(new ushort3x4(rem_lo.v3_0, rem_lo.v3_4, rem_hi.v3_0, rem_hi.v3_4));
            }
            else
            {
                return(new ushort3x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3));
            }
        }
コード例 #5
0
        public static ushort2x4 operator /(ushort2x4 left, ushort right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort8 div = new ushort8(left.c0, left.c1, left.c2, left.c3) / right;

                    return(new ushort2x4(div.v2_0, div.v2_2, div.v2_4, div.v2_6));
                }
            }
            else if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort4 divisor = right;
                    ushort4 lo      = new ushort4(left.c0, left.c1) / divisor;
                    ushort4 hi      = new ushort4(left.c2, left.c3) / divisor;

                    return(new ushort2x4(lo.xy, lo.zw, hi.xy, hi.zw));
                }
            }

            return(new ushort2x4(left.c0 / right, left.c1 / right, left.c2 / right, left.c3 / right));
        }
コード例 #6
0
        public static ushort8 lcm(short8 x, short8 y)
        {
            ushort8 absX = (ushort8)abs(x);
            ushort8 absY = (ushort8)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
コード例 #7
0
        public static ushort2x4 operator %(ushort2x4 left, ushort right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort8 rem = new ushort8(left.c0, left.c1, left.c2, left.c3) % right;

                    return(new ushort2x4(rem.v2_0, rem.v2_2, rem.v2_4, rem.v2_6));
                }
            }
            else if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort4 divisor = right;
                    ushort4 lo      = new ushort4(left.c0, left.c1) % divisor;
                    ushort4 hi      = new ushort4(left.c2, left.c3) % divisor;

                    return(new ushort2x4(lo.xy, lo.zw, hi.xy, hi.zw));
                }
            }

            return(new ushort2x4(left.c0 % right, left.c1 % right, left.c2 % right, left.c3 % right));
        }
コード例 #8
0
        public static int indexof(ushort16 v, ushort x)
        {
            if (Avx2.IsAvx2Supported)
            {
                return(math.tzcnt(Avx2.mm256_movemask_epi8(Avx2.mm256_cmpeq_epi16(v, new ushort16(x)))) >> 1);
            }
            else if (Sse2.IsSse2Supported)
            {
                ushort8 broadcast = x;

                return(math.tzcnt(Sse2.movemask_epi8(Sse2.cmpeq_epi16(v._v8_0, broadcast)) |
                                  (Sse2.movemask_epi8(Sse2.cmpeq_epi16(v._v8_8, broadcast)) << 16)) >> 1);
            }
            else
            {
                for (int i = 0; i < 16; i++)
                {
                    if (v[i] == x)
                    {
                        return(i);
                    }
                    else
                    {
                        continue;
                    }
                }

                return(16);
            }
        }
コード例 #9
0
ファイル: Reverse Bits.cs プロジェクト: csritter/MaxMath
        public static ushort8 reversebits(ushort8 x)
        {
            x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1);
            x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2);
            x = ((x >> 4) & 0x0F0F) | ((x & 0x0F0F) << 4);

            return((x >> 8) | (x << 8));
        }
コード例 #10
0
        public static ushort8 floorpow2(ushort8 x)
        {
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x - (x >> 1));
        }
コード例 #11
0
        public static ushort8 ceilpow2(ushort8 x)
        {
            x -= 1;
            x |= x >> 1;
            x |= x >> 2;
            x |= x >> 4;
            x |= x >> 8;

            return(x + 1);
        }
コード例 #12
0
 public DebuggerProxy(ushort8 v)
 {
     x0 = v.x0;
     x1 = v.x1;
     x2 = v.x2;
     x3 = v.x3;
     x4 = v.x4;
     x5 = v.x5;
     x6 = v.x6;
     x7 = v.x7;
 }
コード例 #13
0
ファイル: AndNot.cs プロジェクト: csritter/MaxMath
 public static ushort8 andnot(ushort8 left, ushort8 right)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.andnot_si128(right, left));
     }
     else
     {
         return(left & ~right);
     }
 }
コード例 #14
0
ファイル: Bit Pattern.cs プロジェクト: csritter/MaxMath
 public static half8 ashalf(ushort8 x)
 {
     if (Sse.IsSseSupported)
     {
         return((v128)x);
     }
     else
     {
         return(*(half8 *)&x);
     }
 }
コード例 #15
0
ファイル: Average.cs プロジェクト: csritter/MaxMath
 public static ushort8 avg(ushort8 x, ushort8 y)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.avg_epu16(x, y));
     }
     else
     {
         return(new ushort8((ushort)((x.x0 + y.x0 + 1) >> 1), (ushort)((x.x1 + y.x1 + 1) >> 1), (ushort)((x.x2 + y.x2 + 1) >> 1), (ushort)((x.x3 + y.x3 + 1) >> 1), (ushort)((x.x4 + y.x4 + 1) >> 1), (ushort)((x.x5 + y.x5 + 1) >> 1), (ushort)((x.x6 + y.x6 + 1) >> 1), (ushort)((x.x7 + y.x7 + 1) >> 1)));
     }
 }
コード例 #16
0
ファイル: Subtract-Add.cs プロジェクト: csritter/MaxMath
 public static ushort8 subadd(ushort8 a, ushort8 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi16(b, new ushort8(ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1)));
     }
     else
     {
         return(a - select(b, (ushort8)(-(short8)b), new bool8(false, true, false, true, false, true, false, true)));
     }
 }
コード例 #17
0
 internal static v128 ShortToByte(ushort8 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(Ssse3.shuffle_epi8(x, new byte8(0, 2, 4, 6, 8, 10, 12, 14)));
     }
     else
     {
         throw new CPUFeatureCheckException();
     }
 }
コード例 #18
0
 public static ushort8 divrem(ushort8 dividend, ushort divisor, out ushort8 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (ushort8)divisor, out remainder));
     }
 }
コード例 #19
0
 public static bool8 ispow2(ushort8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return((v128)(byte8)(new ushort8(1) & Sse2.and_si128(Operator.greater_mask_ushort(x, default(v128)),
                                                              Sse2.cmpeq_epi16(default(v128), x & (x - 1)))));
     }
     else
     {
         return(new bool8(math.ispow2((uint)x.x0), math.ispow2((uint)x.x1), math.ispow2((uint)x.x2), math.ispow2((uint)x.x3), math.ispow2((uint)x.x4), math.ispow2((uint)x.x5), math.ispow2((uint)x.x6), math.ispow2((uint)x.x7)));
     }
 }
コード例 #20
0
ファイル: ushort4x2.cs プロジェクト: csritter/MaxMath
        public static ushort4x2 operator /(ushort4x2 left, ushort4x2 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                ushort8 div = new ushort8(left.c0, left.c1) / new ushort8(right.c0, right.c1);

                return(new ushort4x2(div.v4_0, div.v4_4));
            }
            else
            {
                return(new ushort4x2(left.c0 / right.c0, left.c1 / right.c1));
            }
        }
コード例 #21
0
ファイル: ushort4x3.cs プロジェクト: csritter/MaxMath
        public static ushort4x3 operator /(ushort4x3 left, ushort4x3 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                ushort8 div = new ushort8(left.c0, left.c1) / new ushort8(right.c0, right.c1);

                return(new ushort4x3(div.v4_0, div.v4_4, left.c2 / right.c2));
            }
            else
            {
                return(new ushort4x3(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2));
            }
        }
コード例 #22
0
ファイル: ushort4x2.cs プロジェクト: csritter/MaxMath
        public static ushort4x2 operator %(ushort4x2 left, ushort4x2 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                ushort8 rem = new ushort8(left.c0, left.c1) % new ushort8(right.c0, right.c1);

                return(new ushort4x2(rem.v4_0, rem.v4_4));
            }
            else
            {
                return(new ushort4x2(left.c0 % right.c0, left.c1 % right.c1));
            }
        }
コード例 #23
0
        public static ushort8 countbits(ushort8 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                ushort8 byteBits = (v128)countbits((byte16)(v128)x);

                return((byteBits & 0x00FF) + (byteBits >> 8));
            }
            else
            {
                return(new ushort8((ushort)math.countbits((uint)x.x0), (ushort)math.countbits((uint)x.x1), (ushort)math.countbits((uint)x.x2), (ushort)math.countbits((uint)x.x3), (ushort)math.countbits((uint)x.x4), (ushort)math.countbits((uint)x.x5), (ushort)math.countbits((uint)x.x6), (ushort)math.countbits((uint)x.x7)));
            }
        }
コード例 #24
0
        public static bool8 toboolsafe(ushort8 x)
        {
            if (Sse2.IsSse2Supported)
            {
                return((v128)(byte8)clamp(x, 0, 1));
            }
            else
            {
                byte8 temp = (byte8)clamp(x, 0, 1);

                return(*(bool8 *)&temp);
            }
        }
コード例 #25
0
ファイル: ushort4x3.cs プロジェクト: csritter/MaxMath
        public static ushort4x3 operator %(ushort4x3 left, ushort4x3 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                ushort8 rem = new ushort8(left.c0, left.c1) % new ushort8(right.c0, right.c1);

                return(new ushort4x3(rem.v4_0, rem.v4_4, left.c2 % right.c2));
            }
            else
            {
                return(new ushort4x3(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2));
            }
        }
コード例 #26
0
ファイル: ushort4x4.cs プロジェクト: csritter/MaxMath
        public static ushort4x4 operator %(ushort4x4 left, ushort4x4 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                ushort8 rem_lo = new ushort8(left.c0, left.c1) % new ushort8(right.c0, right.c1);
                ushort8 rem_hi = new ushort8(left.c2, left.c3) % new ushort8(right.c2, right.c3);

                return(new ushort4x4(rem_lo.v4_0, rem_lo.v4_4, rem_hi.v4_0, rem_hi.v4_4));
            }
            else
            {
                return(new ushort4x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3));
            }
        }
コード例 #27
0
ファイル: ushort4x2.cs プロジェクト: csritter/MaxMath
        public static ushort4x2 operator %(ushort4x2 left, ushort right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort8 rem = new ushort8(left.c0, left.c1) % right;

                    return(new ushort4x2(rem.v4_0, rem.v4_4));
                }
            }

            return(new ushort4x2(left.c0 % right, left.c1 % right));
        }
コード例 #28
0
ファイル: ushort4x2.cs プロジェクト: csritter/MaxMath
        public static ushort4x2 operator /(ushort4x2 left, ushort right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    ushort8 div = new ushort8(left.c0, left.c1) / right;

                    return(new ushort4x2(div.v4_0, div.v4_4));
                }
            }

            return(new ushort4x2(left.c0 / right, left.c1 / right));
        }
コード例 #29
0
ファイル: Column Product.cs プロジェクト: csritter/MaxMath
        public static uint cprod(ushort8 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                v128 prod = Avx.mm256_castsi256_si128((uint8)x * (uint8)(ushort8)Sse2.shuffle_epi32(x, Sse.SHUFFLE(0, 1, 2, 3)));
                prod = Sse4_1.mullo_epi32(prod, Sse2.shuffle_epi32(prod, Sse.SHUFFLE(0, 1, 2, 3)));

                return(Sse4_1.mullo_epi32(prod, Sse2.shufflelo_epi16(prod, Sse.SHUFFLE(0, 0, 3, 2))).UInt0);
            }
            else
            {
                return(cprod((uint4)x.v4_0 * (uint4)x.v4_4));
            }
        }
コード例 #30
0
        internal static v128 greater_mask_ushort(ushort8 left, ushort8 right)
        {
            if (Sse2.IsSse2Supported)
            {
                ushort8 mask = 1 << 15;

                return(Sse2.cmpgt_epi16(Sse2.xor_si128(left, mask),
                                        Sse2.xor_si128(right, mask)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }