public static bool8 isinrange(float8 x, float8 min, float8 max) { return(maxmath.min(maxmath.max(x, min), max) == x); //fails at +/- 0. Saving one or two clock cycles is not worth the risk //return asint(maxmath.min(maxmath.max(x, min), max)) == asint(x); }
public static int indexof(float8 v, float x) { if (Avx2.IsAvx2Supported) { return(math.tzcnt(Avx.mm256_movemask_ps(Avx.mm256_cmp_ps(v, new float8(x), (int)Avx.CMP.EQ_OQ)))); } else if (Sse.IsSseSupported) { v128 broadcast = new v128(x); return(math.tzcnt(Sse.movemask_ps(Sse.cmpeq_ps(*(v128 *)&v._v4_0, broadcast)) | (Sse.movemask_ps(Sse.cmpeq_ps(*(v128 *)&v._v4_4, broadcast)) << 4))); } else { for (int i = 0; i < 8; i++) { if (v[i] == x) { return(i); } else { continue; } } return(32); } }
public static void sincos(float8 x, out float8 s, out float8 c) { math.sincos(x.v4_0, out float4 sinLo, out float4 cosLo); math.sincos(x.v4_4, out float4 sinHi, out float4 cosHi); s = new float8(sinLo, sinHi); c = new float8(cosLo, cosHi); }
public static float8 repeat(float8 x, float8 length) { return(clamp(mad(floor(x / length), -length, x), 0f, length)); }
public static float8 max(float8 a, float8 b) { if (Avx.IsAvxSupported) { return(Avx.mm256_max_ps(a, b)); } else { return(new float8(math.max(a.v4_0, b.v4_0), math.max(a.v4_4, b.v4_4))); } }
public static float8 rsqrt(float8 x) { if (Avx.IsAvxSupported) { return(Avx.mm256_rsqrt_ps(x)); } else { return(new float8(math.rsqrt(x.v4_0), math.rsqrt(x.v4_4))); } }
public static float8 step(float8 y, float8 x) { if (Avx.IsAvxSupported) { return(Avx.mm256_blendv_ps(default(float8), new float8(1f), Avx.mm256_cmp_ps(x, y, (int)Avx.CMP.GE_OS))); } else { return(new float8(math.step(x.v4_0, y.v4_0), math.step(x.v4_4, y.v4_4))); } }
public static float8 dadsub(float8 a, float8 b, float8 c) { if (Fma.IsFmaSupported) { return(Fma.mm256_fmsubadd_ps(a, rcp(b), c)); } else { return(new float8(dadsub(a.v4_0, b.v4_0, c.v4_0), dadsub(a.v4_4, b.v4_4, c.v4_4))); } }
public static bool any(float8 x) { if (Avx.IsAvxSupported) { return(bitmask32(8) != Avx.mm256_movemask_ps(Avx.mm256_cmp_ps(x, default(float8), (int)Avx.CMP.EQ_OQ))); } else { return(math.any(x.v4_0) | math.any(x.v4_4)); } }
public static float8 div(float8 dividend, float8 divisor) { if (Avx.IsAvxSupported) { return(dividend * rcp(divisor)); } else { return(new float8(div(dividend.v4_0, divisor.v4_0), div(dividend.v4_4, divisor.v4_4))); } }
public static float8 round(float8 x) { if (Avx.IsAvxSupported) { return(Avx.mm256_round_ps(x, (int)X86.RoundingMode.FROUND_NINT_NOEXC)); } else { return(new float8(math.round(x.v4_0), math.round(x.v4_4))); } }
public static float8 nabs(float8 x) { if (Avx.IsAvxSupported) { return(Avx.mm256_or_ps(x, new v256(1 << 31))); } else { return(new float8(nabs(x.v4_0), nabs(x.v4_4))); } }
public static float8 floor(float8 x) { if (Avx.IsAvxSupported) { return(Avx.mm256_floor_ps(x)); } else { return(new float8(math.floor(x.v4_0), math.floor(x.v4_4))); } }
public static float8 ceil(float8 x) { if (Avx.IsAvxSupported) { return(Avx.mm256_ceil_ps(x)); } else { return(new float8(math.ceil(x.v4_0), math.ceil(x.v4_4))); } }
public static float8 addsub(float8 a, float8 b) { if (Fma.IsFmaSupported) { return(madsub(1f, a, b)); } else { return(new float8(addsub(a.v4_0, b.v4_0), addsub(a.v4_4, b.v4_4))); } }
public static float8 msubadd(float8 a, float8 b, float8 c) { if (Fma.IsFmaSupported) { return(Fma.mm256_fmaddsub_ps(a, b, c)); } else { return(new float8(msubadd(a.v4_0, b.v4_0, c.v4_0), msubadd(a.v4_4, b.v4_4, c.v4_4))); } }
public static float8 subadd(float8 a, float8 b) { if (Avx.IsAvxSupported) { return(Avx.mm256_addsub_ps(a, b)); } else { return(new float8(subadd(a.v4_0, b.v4_0), subadd(a.v4_4, b.v4_4))); } }
public static float8 abs(float8 x) { if (Avx.IsAvxSupported) { return(Avx.mm256_and_ps(x, new v256(maxmath.bitmask32(31)))); } else { return(new float8(math.abs(x.v4_0), math.abs(x.v4_4))); } }
public static bool all(float8 x) { if (Avx.IsAvxSupported) { return(0 == Avx.mm256_movemask_ps(Avx.mm256_cmp_ps(x, default(float8), (int)Avx.CMP.EQ_OQ))); } else { return(math.all(x.v4_0) & math.all(x.v4_4)); } }
public static uint8 asuint(float8 x) { if (Avx.IsAvxSupported) { return((v256)x); } else { return(*(uint8 *)&x); } }
public static float8 smoothlerp(float8 from, float8 to, float t) { float2 bi = t * new float2(-2f * t, 3f); t *= math.csum(bi); return(mad(t, to, mad(-t, from, from))); }
internal static int v256(float8 x) { if (Avx.IsAvxSupported) { return(v128(Sse2.xor_si128(Avx.mm256_castps256_ps128(x), Avx.mm256_extractf128_ps(x, 1)))); } else { throw new CPUFeatureCheckException(); } }
public static float dot(float8 x, float8 y) { if (Avx.IsAvxSupported) { x = Avx.mm256_dp_ps(x, y, 255); return(Sse.add_ss(Avx.mm256_castps256_ps128(x), Avx.mm256_extractf128_ps(x, 1)).Float0); } else { return(math.dot(x.v4_0, y.v4_0) + math.dot(x.v4_4, y.v4_4)); } }
public float8 NextFloat8(float8 min, float8 max) { Assert.IsNotSmaller(max.x0, min.x0); Assert.IsNotSmaller(max.x1, min.x1); Assert.IsNotSmaller(max.x2, min.x2); Assert.IsNotSmaller(max.x3, min.x3); Assert.IsNotSmaller(max.x4, min.x4); Assert.IsNotSmaller(max.x5, min.x5); Assert.IsNotSmaller(max.x6, min.x6); Assert.IsNotSmaller(max.x7, min.x7); return(maxmath.mad(NextFloat8(), max - min, min)); }
public static float8 divrem(float8 dividend, float8 divisor, out float8 remainder, bool fastApproximate = false) { if (fastApproximate) { remainder = divisor * modf(div(dividend, divisor), out float8 quotient); return(quotient); } else { remainder = divisor * modf(dividend / divisor, out float8 quotient); return(quotient); } }
internal static ushort16 vdiv_ushort(ushort16 dividend, ushort16 divisor) { if (Avx2.IsAvx2Supported) { float8 lo = vdiv_ushort_AVX(dividend.v8_0, divisor.v8_0); float8 hi = vdiv_ushort_AVX(dividend.v8_8, divisor.v8_8); return(Avx2.mm256_permute4x64_epi64(Avx2.mm256_packus_epi32((uint8)lo, (uint8)hi), Sse.SHUFFLE(3, 1, 2, 0))); } else { throw new CPUFeatureCheckException(); } }
public static int8 compareto(float8 x, float8 y) { if (Avx.IsAvxSupported) { int8 xGreatery = Avx.mm256_cmp_ps(x, y, (int)Avx.CMP.GT_OS); int8 yGreaterx = Avx.mm256_cmp_ps(y, x, (int)Avx.CMP.GT_OS); return((0 - xGreatery) + yGreaterx); } else { return(new int8(compareto(x.v4_0, y.v4_0), compareto(x.v4_4, y.v4_4))); } }
public static float csum(float8 x) { if (Avx.IsAvxSupported) { v128 result = Sse.add_ps(Avx.mm256_castps256_ps128(x), Avx.mm256_extractf128_ps(x, 1)); result = Sse.add_ps(result, Sse2.shuffle_epi32(result, Sse.SHUFFLE(0, 1, 2, 3))); return(Sse.add_ss(result, Sse2.shufflelo_epi16(result, Sse.SHUFFLE(0, 0, 3, 2))).Float0); } else { return(math.csum(x.v4_0 + x.v4_4)); } }
public static float8 sign(float8 x) { if (Avx.IsAvxSupported) { v256 exp = new v256(math.asfloat(0x3F80_0000)); float8 zeroMask = Avx.mm256_cmp_ps(x, default(v256), (int)Avx.CMP.EQ_OQ); float8 negativeMask = Avx.mm256_cmp_ps(x, default(v256), (int)Avx.CMP.LT_OS); float8 positiveMask = Avx.mm256_cmp_ps(x, default(v256), (int)Avx.CMP.GT_OS); negativeMask = Avx.mm256_and_ps(negativeMask, exp); positiveMask = Avx.mm256_and_ps(positiveMask, exp); return(Avx.mm256_blendv_ps(positiveMask - negativeMask, x, zeroMask)); } else { return(new float8(math.sign(x.v4_0), math.sign(x.v4_4))); } }
private static float8 vdiv_ushort_AVX(ushort8 dividend, ushort8 divisor) { Assert.AreNotEqual(divisor.x0, 0); Assert.AreNotEqual(divisor.x1, 0); Assert.AreNotEqual(divisor.x2, 0); Assert.AreNotEqual(divisor.x3, 0); Assert.AreNotEqual(divisor.x4, 0); Assert.AreNotEqual(divisor.x5, 0); Assert.AreNotEqual(divisor.x6, 0); Assert.AreNotEqual(divisor.x7, 0); if (Avx.IsAvxSupported) { float8 dividend_f32 = dividend; float8 divisor_f32 = divisor; float8 divisor_f32_rcp = Avx.mm256_rcp_ps(divisor_f32); float8 precisionLossCompensation; if (Fma.IsFmaSupported) { precisionLossCompensation = Fma.mm256_fnmadd_ps(divisor_f32_rcp, divisor_f32, new v256(PRECISION_ADJUSTMENT_FACTOR)); } else { precisionLossCompensation = maxmath.mad(-divisor_f32_rcp, divisor_f32, math.asfloat(PRECISION_ADJUSTMENT_FACTOR)); } precisionLossCompensation *= divisor_f32_rcp; precisionLossCompensation *= dividend_f32; return(precisionLossCompensation); } else { throw new CPUFeatureCheckException(); } }