예제 #1
0
        public static int csum(sbyte32 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                short16 cast = (short16)x.v16_0 + (short16)x.v16_16;
                short8  more = cast.v8_0 + cast.v8_8;

                more += Sse2.unpackhi_epi64(more, more);
                more += Sse2.shufflelo_epi16(more, Sse.SHUFFLE(0, 1, 2, 3));

                return(Sse2.add_epi16(more, Sse2.shufflelo_epi16(more, Sse.SHUFFLE(0, 0, 0, 1))).SShort0);
            }
            else if (Sse2.IsSse2Supported)
            {
                short8 cast = ((short8)x.v8_0 + (short8)x.v8_8) + ((short8)x.v8_16 + (short8)x.v8_24);

                cast += Sse2.unpackhi_epi64(cast, cast);
                cast += Sse2.shufflelo_epi16(cast, Sse.SHUFFLE(0, 1, 2, 3));

                return(Sse2.add_epi16(cast, Sse2.shufflelo_epi16(cast, Sse.SHUFFLE(0, 0, 0, 1))).SShort0);
            }
            else
            {
                return(((((x.x0 + x.x1) + (x.x2 + x.x3)) + ((x.x4 + x.x5) + (x.x6 + x.x7))) + (((x.x8 + x.x9) + (x.x10 + x.x11)) + ((x.x12 + x.x13) + (x.x14 + x.x15)))) + ((((x.x16 + x.x17) + (x.x18 + x.x19)) + ((x.x20 + x.x21) + (x.x22 + x.x23))) + (((x.x24 + x.x25) + (x.x26 + x.x27)) + ((x.x28 + x.x29) + (x.x30 + x.x31)))));
            }
        }
예제 #2
0
        public static short2x4 operator /(short2x4 left, short right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    short8 div = new short8(left.c0, left.c1, left.c2, left.c3) / right;

                    return(new short2x4(div.v2_0, div.v2_2, div.v2_4, div.v2_6));
                }
            }
            else if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    short4 divisor = right;
                    short4 lo      = new short4(left.c0, left.c1) / divisor;
                    short4 hi      = new short4(left.c2, left.c3) / divisor;

                    return(new short2x4(lo.xy, lo.zw, hi.xy, hi.zw));
                }
            }

            return(new short2x4(left.c0 / right, left.c1 / right, left.c2 / right, left.c3 / right));
        }
예제 #3
0
        public static short2x4 operator %(short2x4 left, short right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    short8 rem = new short8(left.c0, left.c1, left.c2, left.c3) % right;

                    return(new short2x4(rem.v2_0, rem.v2_2, rem.v2_4, rem.v2_6));
                }
            }
            else if (Sse2.IsSse2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    short4 divisor = right;
                    short4 lo      = new short4(left.c0, left.c1) % divisor;
                    short4 hi      = new short4(left.c2, left.c3) % divisor;

                    return(new short2x4(lo.xy, lo.zw, hi.xy, hi.zw));
                }
            }

            return(new short2x4(left.c0 % right, left.c1 % right, left.c2 % right, left.c3 % right));
        }
예제 #4
0
        public static ushort8 lcm(short8 x, short8 y)
        {
            ushort8 absX = (ushort8)abs(x);
            ushort8 absY = (ushort8)abs(y);

            return((absX / gcd(absX, absY)) * absY);
        }
예제 #5
0
        public static short3x4 operator %(short3x4 left, short3x4 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                short8 dividend_lo = Sse2.unpacklo_epi64(left.c0, left.c1);
                short8 dividend_hi = Sse2.unpacklo_epi64(left.c2, left.c3);

                short8 divisor_lo = Sse2.unpacklo_epi64(right.c0, right.c1);
                short8 divisor_hi = Sse2.unpacklo_epi64(right.c2, right.c3);
#if DEBUG
                divisor_lo.x3 = 1;
                divisor_lo.x7 = 1;
                divisor_hi.x3 = 1;
                divisor_hi.x7 = 1;
#endif
                short8 rem_lo = dividend_lo % divisor_lo;
                short8 rem_hi = dividend_hi % divisor_hi;

                return(new short3x4(rem_lo.v3_0, rem_lo.v3_4, rem_hi.v3_0, rem_hi.v3_4));
            }
            else
            {
                return(new short3x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3));
            }
        }
예제 #6
0
파일: short8.cs 프로젝트: csritter/MaxMath
 public DebuggerProxy(short8 v)
 {
     x0 = v.x0;
     x1 = v.x1;
     x2 = v.x2;
     x3 = v.x3;
     x4 = v.x4;
     x5 = v.x5;
     x6 = v.x6;
     x7 = v.x7;
 }
예제 #7
0
파일: Max.cs 프로젝트: csritter/MaxMath
 public static short8 max(short8 a, short8 b)
 {
     if (Sse2.IsSse2Supported)
     {
         return(Sse2.max_epi16(a, b));
     }
     else
     {
         return(new short8((short)math.max(a.x0, b.x0), (short)math.max(a.x1, b.x1), (short)math.max(a.x2, b.x2), (short)math.max(a.x3, b.x3), (short)math.max(a.x4, b.x4), (short)math.max(a.x5, b.x5), (short)math.max(a.x6, b.x6), (short)math.max(a.x7, b.x7)));
     }
 }
예제 #8
0
파일: Sign.cs 프로젝트: csritter/MaxMath
 public static short8 sign(short8 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(Ssse3.sign_epi16(new short8(1), x));
     }
     else
     {
         return((x >> 15) | (short8)((ushort8)(-x) >> 15));
     }
 }
예제 #9
0
 public static bool all_eq(short8 c)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(((short8)Ssse3.shuffle_epi8(c, new v128(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1))).Equals(c));
     }
     else
     {
         return(((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & c.x0 == c.x7));
     }
 }
예제 #10
0
파일: Any.cs 프로젝트: csritter/MaxMath
 public static bool any(short8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(bitmask32(8 * sizeof(short)) != Sse2.movemask_epi8(Sse2.cmpeq_epi16(x, default(v128))));
     }
     else
     {
         return(any(x != 0));
     }
 }
예제 #11
0
 public static half8 ashalf(short8 x)
 {
     if (Sse.IsSseSupported)
     {
         return((v128)x);
     }
     else
     {
         return(*(half8 *)&x);
     }
 }
예제 #12
0
파일: All.cs 프로젝트: csritter/MaxMath
 public static bool all(short8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return(0 == Sse2.movemask_epi8(Sse2.cmpeq_epi16(x, default(v128))));
     }
     else
     {
         return(all(x != 0));
     }
 }
예제 #13
0
 public static short8 subadd(short8 a, short8 b)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(a + Ssse3.sign_epi16(b, new ushort8(ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1, ushort.MaxValue, 1)));
     }
     else
     {
         return(a - select(b, -b, new bool8(false, true, false, true, false, true, false, true)));
     }
 }
예제 #14
0
 internal static v128 ShortToByte(short8 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(Ssse3.shuffle_epi8(x, new byte8(0, 2, 4, 6, 8, 10, 12, 14)));
     }
     else
     {
         throw new CPUFeatureCheckException();
     }
 }
예제 #15
0
 public static short8 divrem(short8 dividend, short divisor, out short8 remainder)
 {
     if (Constant.IsConstantExpression(divisor))
     {
         remainder = dividend % divisor;
         return(dividend / divisor);
     }
     else
     {
         return(divrem(dividend, (short8)divisor, out remainder));
     }
 }
예제 #16
0
 public static bool8 ispow2(short8 x)
 {
     if (Sse2.IsSse2Supported)
     {
         return((v128)(byte8)(new short8(1) & Sse2.and_si128(Sse2.cmpgt_epi16(x, default(v128)),
                                                             Sse2.cmpeq_epi16(default(v128), x & (x - 1)))));
     }
     else
     {
         return(new bool8(math.ispow2(x.x0), math.ispow2(x.x1), math.ispow2(x.x2), math.ispow2(x.x3), math.ispow2(x.x4), math.ispow2(x.x5), math.ispow2(x.x6), math.ispow2(x.x7)));
     }
 }
예제 #17
0
        public static short4x2 operator %(short4x2 left, short4x2 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                short8 rem = new short8(left.c0, left.c1) % new short8(right.c0, right.c1);

                return(new short4x2(rem.v4_0, rem.v4_4));
            }
            else
            {
                return(new short4x2(left.c0 % right.c0, left.c1 % right.c1));
            }
        }
예제 #18
0
        public static short8 rol(short8 x, short8 n)
        {
            if (Sse2.IsSse2Supported)
            {
                n &= 15;

                return((short8)(shl((ushort8)x, (ushort8)n) | shrl((ushort8)x, (ushort8)(-n & 15))));
            }
            else
            {
                return(new short8(rol(x.x0, n.x0), rol(x.x1, n.x1), rol(x.x2, n.x2), rol(x.x3, n.x3), rol(x.x4, n.x4), rol(x.x5, n.x5), rol(x.x6, n.x6), rol(x.x7, n.x7)));
            }
        }
예제 #19
0
        public static short4x2 operator /(short4x2 left, short4x2 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                short8 div = new short8(left.c0, left.c1) / new short8(right.c0, right.c1);

                return(new short4x2(div.v4_0, div.v4_4));
            }
            else
            {
                return(new short4x2(left.c0 / right.c0, left.c1 / right.c1));
            }
        }
예제 #20
0
        public static bool8 toboolsafe(short8 x)
        {
            if (Sse2.IsSse2Supported)
            {
                return((v128)(byte8)clamp(x, 0, 1));
            }
            else
            {
                byte8 temp = (byte8)clamp(x, 0, 1);

                return(*(bool8 *)&temp);
            }
        }
예제 #21
0
        public static bool8 isdivisible(short8 dividend, short8 divisor)
        {
            Assert.AreNotEqual(0, divisor.x0);
            Assert.AreNotEqual(0, divisor.x1);
            Assert.AreNotEqual(0, divisor.x2);
            Assert.AreNotEqual(0, divisor.x3);
            Assert.AreNotEqual(0, divisor.x4);
            Assert.AreNotEqual(0, divisor.x5);
            Assert.AreNotEqual(0, divisor.x6);
            Assert.AreNotEqual(0, divisor.x7);

            return(dividend % divisor == 0);
        }
예제 #22
0
        public byte8 NextByte8(byte8 max)
        {
            if (Ssse3.IsSsse3Supported)
            {
                short8 temp = (short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState());

                return(Ssse3.shuffle_epi8(temp, new byte8(1, 3, 5, 7, 9, 11, 13, 15)));
            }
            else
            {
                return((byte8)(((short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState())) >> 8));
            }
        }
예제 #23
0
        public static short4x3 operator %(short4x3 left, short4x3 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                short8 rem = new short8(left.c0, left.c1) % new short8(right.c0, right.c1);

                return(new short4x3(rem.v4_0, rem.v4_4, left.c2 % right.c2));
            }
            else
            {
                return(new short4x3(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2));
            }
        }
예제 #24
0
        public static short4x3 operator /(short4x3 left, short4x3 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                short8 div = new short8(left.c0, left.c1) / new short8(right.c0, right.c1);

                return(new short4x3(div.v4_0, div.v4_4, left.c2 / right.c2));
            }
            else
            {
                return(new short4x3(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2));
            }
        }
예제 #25
0
        public static short4x4 operator /(short4x4 left, short4x4 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                short8 div_lo = new short8(left.c0, left.c1) / new short8(right.c0, right.c1);
                short8 div_hi = new short8(left.c2, left.c3) / new short8(right.c2, right.c3);

                return(new short4x4(div_lo.v4_0, div_lo.v4_4, div_hi.v4_0, div_hi.v4_4));
            }
            else
            {
                return(new short4x4(left.c0 / right.c0, left.c1 / right.c1, left.c2 / right.c2, left.c3 / right.c3));
            }
        }
예제 #26
0
        public static int cprod(short8 x)
        {
            if (Avx2.IsAvx2Supported)
            {
                v128 prod = Avx.mm256_castsi256_si128((int8)x * (int8)(short8)Sse2.shuffle_epi32(x, Sse.SHUFFLE(0, 1, 2, 3)));
                prod = Sse4_1.mullo_epi32(prod, Sse2.shuffle_epi32(prod, Sse.SHUFFLE(0, 1, 2, 3)));

                return(Sse4_1.mullo_epi32(prod, Sse2.shufflelo_epi16(prod, Sse.SHUFFLE(0, 0, 3, 2))).SInt0);
            }
            else
            {
                return(cprod((int4)x.v4_0 * (int4)x.v4_4));
            }
        }
예제 #27
0
        public static short4x4 operator %(short4x4 left, short4x4 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                short8 rem_lo = new short8(left.c0, left.c1) % new short8(right.c0, right.c1);
                short8 rem_hi = new short8(left.c2, left.c3) % new short8(right.c2, right.c3);

                return(new short4x4(rem_lo.v4_0, rem_lo.v4_4, rem_hi.v4_0, rem_hi.v4_4));
            }
            else
            {
                return(new short4x4(left.c0 % right.c0, left.c1 % right.c1, left.c2 % right.c2, left.c3 % right.c3));
            }
        }
예제 #28
0
        public static short4x3 operator %(short4x3 left, short right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    short8 rem = new short8(left.c0, left.c1) % right;

                    return(new short4x3(rem.v4_0, rem.v4_4, left.c2 % right));
                }
            }

            return(new short4x3(left.c0 % right, left.c1 % right, left.c2 % right));
        }
예제 #29
0
        public static short4x3 operator /(short4x3 left, short right)
        {
            if (Avx2.IsAvx2Supported)
            {
                if (!Constant.IsConstantExpression(right))
                {
                    short8 div = new short8(left.c0, left.c1) / right;

                    return(new short4x3(div.v4_0, div.v4_4, left.c2 / right));
                }
            }

            return(new short4x3(left.c0 / right, left.c1 / right, left.c2 / right));
        }
예제 #30
0
        public static int dot(short8 a, short8 b)
        {
            if (Sse2.IsSse2Supported)
            {
                a = Sse2.madd_epi16(a, b);

                a = Sse2.add_epi32(a, Sse2.shuffle_epi32(a, Sse.SHUFFLE(0, 1, 2, 3)));

                return(Sse2.add_epi32(a, Sse2.shufflelo_epi16(a, Sse.SHUFFLE(0, 0, 3, 2))).SInt0);
            }
            else
            {
                return((((a.x0 * b.x0) + (a.x1 * b.x1)) + ((a.x2 * b.x2) + (a.x3 * b.x3))) + (((a.x4 * b.x4) + (a.x5 * b.x5)) + ((a.x6 * b.x6) + (a.x7 * b.x7))));
            }
        }