예제 #1
0
        public static bool8 isinrange(float8 x, float8 min, float8 max)
        {
            return(maxmath.min(maxmath.max(x, min), max) == x);

            //fails at +/- 0. Saving one or two clock cycles is not worth the risk
            //return asint(maxmath.min(maxmath.max(x, min), max)) == asint(x);
        }
예제 #2
0
        public static int indexof(float8 v, float x)
        {
            if (Avx2.IsAvx2Supported)
            {
                return(math.tzcnt(Avx.mm256_movemask_ps(Avx.mm256_cmp_ps(v, new float8(x), (int)Avx.CMP.EQ_OQ))));
            }
            else if (Sse.IsSseSupported)
            {
                v128 broadcast = new v128(x);

                return(math.tzcnt(Sse.movemask_ps(Sse.cmpeq_ps(*(v128 *)&v._v4_0, broadcast)) |
                                  (Sse.movemask_ps(Sse.cmpeq_ps(*(v128 *)&v._v4_4, broadcast)) << 4)));
            }
            else
            {
                for (int i = 0; i < 8; i++)
                {
                    if (v[i] == x)
                    {
                        return(i);
                    }
                    else
                    {
                        continue;
                    }
                }

                return(32);
            }
        }
예제 #3
0
        public static void sincos(float8 x, out float8 s, out float8 c)
        {
            math.sincos(x.v4_0, out float4 sinLo, out float4 cosLo);
            math.sincos(x.v4_4, out float4 sinHi, out float4 cosHi);

            s = new float8(sinLo, sinHi);
            c = new float8(cosLo, cosHi);
        }
예제 #4
0
 public static float8 repeat(float8 x, float8 length)
 {
     return(clamp(mad(floor(x / length),
                      -length,
                      x),
                  0f,
                  length));
 }
예제 #5
0
파일: Max.cs 프로젝트: csritter/MaxMath
 public static float8 max(float8 a, float8 b)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_max_ps(a, b));
     }
     else
     {
         return(new float8(math.max(a.v4_0, b.v4_0), math.max(a.v4_4, b.v4_4)));
     }
 }
 public static float8 rsqrt(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_rsqrt_ps(x));
     }
     else
     {
         return(new float8(math.rsqrt(x.v4_0), math.rsqrt(x.v4_4)));
     }
 }
 public static float8 step(float8 y, float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_blendv_ps(default(float8), new float8(1f), Avx.mm256_cmp_ps(x, y, (int)Avx.CMP.GE_OS)));
     }
     else
     {
         return(new float8(math.step(x.v4_0, y.v4_0), math.step(x.v4_4, y.v4_4)));
     }
 }
예제 #8
0
 public static float8 dadsub(float8 a, float8 b, float8 c)
 {
     if (Fma.IsFmaSupported)
     {
         return(Fma.mm256_fmsubadd_ps(a, rcp(b), c));
     }
     else
     {
         return(new float8(dadsub(a.v4_0, b.v4_0, c.v4_0), dadsub(a.v4_4, b.v4_4, c.v4_4)));
     }
 }
예제 #9
0
파일: Any.cs 프로젝트: csritter/MaxMath
 public static bool any(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(bitmask32(8) != Avx.mm256_movemask_ps(Avx.mm256_cmp_ps(x, default(float8), (int)Avx.CMP.EQ_OQ)));
     }
     else
     {
         return(math.any(x.v4_0) | math.any(x.v4_4));
     }
 }
예제 #10
0
 public static float8 div(float8 dividend, float8 divisor)
 {
     if (Avx.IsAvxSupported)
     {
         return(dividend * rcp(divisor));
     }
     else
     {
         return(new float8(div(dividend.v4_0, divisor.v4_0), div(dividend.v4_4, divisor.v4_4)));
     }
 }
예제 #11
0
 public static float8 round(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_round_ps(x, (int)X86.RoundingMode.FROUND_NINT_NOEXC));
     }
     else
     {
         return(new float8(math.round(x.v4_0), math.round(x.v4_4)));
     }
 }
예제 #12
0
 public static float8 nabs(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_or_ps(x, new v256(1 << 31)));
     }
     else
     {
         return(new float8(nabs(x.v4_0), nabs(x.v4_4)));
     }
 }
예제 #13
0
 public static float8 floor(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_floor_ps(x));
     }
     else
     {
         return(new float8(math.floor(x.v4_0), math.floor(x.v4_4)));
     }
 }
예제 #14
0
 public static float8 ceil(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_ceil_ps(x));
     }
     else
     {
         return(new float8(math.ceil(x.v4_0), math.ceil(x.v4_4)));
     }
 }
예제 #15
0
 public static float8 addsub(float8 a, float8 b)
 {
     if (Fma.IsFmaSupported)
     {
         return(madsub(1f, a, b));
     }
     else
     {
         return(new float8(addsub(a.v4_0, b.v4_0), addsub(a.v4_4, b.v4_4)));
     }
 }
예제 #16
0
 public static float8 msubadd(float8 a, float8 b, float8 c)
 {
     if (Fma.IsFmaSupported)
     {
         return(Fma.mm256_fmaddsub_ps(a, b, c));
     }
     else
     {
         return(new float8(msubadd(a.v4_0, b.v4_0, c.v4_0), msubadd(a.v4_4, b.v4_4, c.v4_4)));
     }
 }
예제 #17
0
 public static float8 subadd(float8 a, float8 b)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_addsub_ps(a, b));
     }
     else
     {
         return(new float8(subadd(a.v4_0, b.v4_0), subadd(a.v4_4, b.v4_4)));
     }
 }
예제 #18
0
 public static float8 abs(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(Avx.mm256_and_ps(x, new v256(maxmath.bitmask32(31))));
     }
     else
     {
         return(new float8(math.abs(x.v4_0), math.abs(x.v4_4)));
     }
 }
예제 #19
0
파일: All.cs 프로젝트: csritter/MaxMath
 public static bool all(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(0 == Avx.mm256_movemask_ps(Avx.mm256_cmp_ps(x, default(float8), (int)Avx.CMP.EQ_OQ)));
     }
     else
     {
         return(math.all(x.v4_0) & math.all(x.v4_4));
     }
 }
예제 #20
0
 public static uint8 asuint(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return((v256)x);
     }
     else
     {
         return(*(uint8 *)&x);
     }
 }
예제 #21
0
파일: Cubic.cs 프로젝트: csritter/MaxMath
        public static float8 smoothlerp(float8 from, float8 to, float t)
        {
            float2 bi = t * new float2(-2f * t, 3f);

            t *= math.csum(bi);

            return(mad(t,
                       to,
                       mad(-t,
                           from,
                           from)));
        }
예제 #22
0
 internal static int v256(float8 x)
 {
     if (Avx.IsAvxSupported)
     {
         return(v128(Sse2.xor_si128(Avx.mm256_castps256_ps128(x),
                                    Avx.mm256_extractf128_ps(x, 1))));
     }
     else
     {
         throw new CPUFeatureCheckException();
     }
 }
예제 #23
0
        public static float dot(float8 x, float8 y)
        {
            if (Avx.IsAvxSupported)
            {
                x = Avx.mm256_dp_ps(x, y, 255);

                return(Sse.add_ss(Avx.mm256_castps256_ps128(x), Avx.mm256_extractf128_ps(x, 1)).Float0);
            }
            else
            {
                return(math.dot(x.v4_0, y.v4_0) + math.dot(x.v4_4, y.v4_4));
            }
        }
예제 #24
0
        public float8 NextFloat8(float8 min, float8 max)
        {
            Assert.IsNotSmaller(max.x0, min.x0);
            Assert.IsNotSmaller(max.x1, min.x1);
            Assert.IsNotSmaller(max.x2, min.x2);
            Assert.IsNotSmaller(max.x3, min.x3);
            Assert.IsNotSmaller(max.x4, min.x4);
            Assert.IsNotSmaller(max.x5, min.x5);
            Assert.IsNotSmaller(max.x6, min.x6);
            Assert.IsNotSmaller(max.x7, min.x7);

            return(maxmath.mad(NextFloat8(), max - min, min));
        }
예제 #25
0
        public static float8 divrem(float8 dividend, float8 divisor, out float8 remainder, bool fastApproximate = false)
        {
            if (fastApproximate)
            {
                remainder = divisor * modf(div(dividend, divisor), out float8 quotient);

                return(quotient);
            }
            else
            {
                remainder = divisor * modf(dividend / divisor, out float8 quotient);

                return(quotient);
            }
        }
예제 #26
0
        internal static ushort16 vdiv_ushort(ushort16 dividend, ushort16 divisor)
        {
            if (Avx2.IsAvx2Supported)
            {
                float8 lo = vdiv_ushort_AVX(dividend.v8_0, divisor.v8_0);
                float8 hi = vdiv_ushort_AVX(dividend.v8_8, divisor.v8_8);

                return(Avx2.mm256_permute4x64_epi64(Avx2.mm256_packus_epi32((uint8)lo, (uint8)hi),
                                                    Sse.SHUFFLE(3, 1, 2, 0)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
예제 #27
0
        public static int8 compareto(float8 x, float8 y)
        {
            if (Avx.IsAvxSupported)
            {
                int8 xGreatery = Avx.mm256_cmp_ps(x, y, (int)Avx.CMP.GT_OS);
                int8 yGreaterx = Avx.mm256_cmp_ps(y, x, (int)Avx.CMP.GT_OS);

                return((0 - xGreatery) + yGreaterx);
            }
            else
            {
                return(new int8(compareto(x.v4_0, y.v4_0),
                                compareto(x.v4_4, y.v4_4)));
            }
        }
예제 #28
0
        public static float csum(float8 x)
        {
            if (Avx.IsAvxSupported)
            {
                v128 result = Sse.add_ps(Avx.mm256_castps256_ps128(x),
                                         Avx.mm256_extractf128_ps(x, 1));

                result = Sse.add_ps(result, Sse2.shuffle_epi32(result, Sse.SHUFFLE(0, 1, 2, 3)));

                return(Sse.add_ss(result, Sse2.shufflelo_epi16(result, Sse.SHUFFLE(0, 0, 3, 2))).Float0);
            }
            else
            {
                return(math.csum(x.v4_0 + x.v4_4));
            }
        }
예제 #29
0
파일: Sign.cs 프로젝트: csritter/MaxMath
        public static float8 sign(float8 x)
        {
            if (Avx.IsAvxSupported)
            {
                v256 exp = new v256(math.asfloat(0x3F80_0000));

                float8 zeroMask     = Avx.mm256_cmp_ps(x, default(v256), (int)Avx.CMP.EQ_OQ);
                float8 negativeMask = Avx.mm256_cmp_ps(x, default(v256), (int)Avx.CMP.LT_OS);
                float8 positiveMask = Avx.mm256_cmp_ps(x, default(v256), (int)Avx.CMP.GT_OS);

                negativeMask = Avx.mm256_and_ps(negativeMask, exp);
                positiveMask = Avx.mm256_and_ps(positiveMask, exp);


                return(Avx.mm256_blendv_ps(positiveMask - negativeMask, x, zeroMask));
            }
            else
            {
                return(new float8(math.sign(x.v4_0), math.sign(x.v4_4)));
            }
        }
예제 #30
0
        private static float8 vdiv_ushort_AVX(ushort8 dividend, ushort8 divisor)
        {
            Assert.AreNotEqual(divisor.x0, 0);
            Assert.AreNotEqual(divisor.x1, 0);
            Assert.AreNotEqual(divisor.x2, 0);
            Assert.AreNotEqual(divisor.x3, 0);
            Assert.AreNotEqual(divisor.x4, 0);
            Assert.AreNotEqual(divisor.x5, 0);
            Assert.AreNotEqual(divisor.x6, 0);
            Assert.AreNotEqual(divisor.x7, 0);

            if (Avx.IsAvxSupported)
            {
                float8 dividend_f32 = dividend;
                float8 divisor_f32  = divisor;

                float8 divisor_f32_rcp = Avx.mm256_rcp_ps(divisor_f32);


                float8 precisionLossCompensation;

                if (Fma.IsFmaSupported)
                {
                    precisionLossCompensation = Fma.mm256_fnmadd_ps(divisor_f32_rcp, divisor_f32, new v256(PRECISION_ADJUSTMENT_FACTOR));
                }
                else
                {
                    precisionLossCompensation = maxmath.mad(-divisor_f32_rcp, divisor_f32, math.asfloat(PRECISION_ADJUSTMENT_FACTOR));
                }

                precisionLossCompensation *= divisor_f32_rcp;
                precisionLossCompensation *= dividend_f32;

                return(precisionLossCompensation);
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }