public static Vector256 <T> AndNot <T>(Vector256 <T> left, Vector256 <T> right) where T : struct { if (typeof(T) == typeof(float)) { if (Avx.IsSupported) { return(Avx.AndNot(left.AsSingle(), right.AsSingle()).As <float, T>()); } } if (typeof(T) == typeof(double)) { if (Avx.IsSupported) { return(Avx.AndNot(left.AsDouble(), right.AsDouble()).As <double, T>()); } } if (Avx.IsSupported) { return(Avx.AndNot(left.AsSingle(), right.AsSingle()).As <float, T>()); } return(SoftwareFallbacks.AndNot_Software(left, right)); }
public static Vector256 <float> Log(Vector256 <float> value) { Vector256 <float> invalidMask = Compare(value, Vector256 <float> .Zero, FloatComparisonMode.LessThanOrEqualOrderedNonSignaling); Vector256 <float> x = Max(value, MinNormPos.AsSingle()); Vector256 <int> ei = Avx2.ShiftRightLogical(x.AsInt32(), 23); x = Or(And(x, MantMask.AsSingle()), Point5); ei = Avx2.Subtract(ei, Ox7); Vector256 <float> e = Add(ConvertToVector256Single(ei), One); Vector256 <float> mask = Compare(x, Sqrthf, FloatComparisonMode.LessThanOrderedNonSignaling); Vector256 <float> tmp = And(x, mask); x = Subtract(x, One); e = Subtract(e, And(One, mask)); x = Add(x, tmp); Vector256 <float> z = Multiply(x, x); Vector256 <float> y = LogP0; y = Add(Multiply(y, x), LogP1); y = Add(Multiply(y, x), LogP2); y = Add(Multiply(y, x), LogP3); y = Add(Multiply(y, x), LogP4); y = Add(Multiply(y, x), LogP5); y = Add(Multiply(y, x), LogP6); y = Add(Multiply(y, x), LogP7); y = Add(Multiply(y, x), LogP8); y = Multiply(Multiply(y, x), z); y = Add(y, Multiply(e, LogQ1)); y = Subtract(y, Multiply(z, Point5)); x = Add(Add(x, y), Multiply(e, LogQ2)); return(Or(x, invalidMask)); }
public static Vector256 <float> Exp(Vector256 <float> value) { value = Min(value, MaxValue); value = Max(value, MinValue); Vector256 <float> fx = Multiply(value, Log2); fx = Floor(Add(fx, Point5)); Vector256 <float> tmp = Multiply(fx, C1); Vector256 <float> z = Multiply(fx, C2); Vector256 <float> x = Subtract(value, tmp); x = Subtract(x, z); z = Multiply(x, x); Vector256 <float> y = P0; y = Add(Multiply(y, x), P1); y = Add(Multiply(y, x), P2); y = Add(Multiply(y, x), P3); y = Add(Multiply(y, x), P4); y = Add(Multiply(y, x), Point5); y = Add(Add(Multiply(y, z), x), One); Vector256 <int> pow2n = ConvertToVector256Int32(fx); pow2n = Avx2.Add(pow2n, Ox7); pow2n = Avx2.ShiftLeftLogical(pow2n, 23); return(Multiply(y, pow2n.AsSingle())); }
public static Vector256 <T> Vector256Add <T>(Vector256 <T> left, Vector256 <T> right) where T : struct { if (typeof(T) == typeof(byte)) { return(Avx2.Add(left.AsByte(), right.AsByte()).As <byte, T>()); } else if (typeof(T) == typeof(sbyte)) { return(Avx2.Add(left.AsSByte(), right.AsSByte()).As <sbyte, T>()); } else if (typeof(T) == typeof(short)) { return(Avx2.Add(left.AsInt16(), right.AsInt16()).As <short, T>()); } else if (typeof(T) == typeof(ushort)) { return(Avx2.Add(left.AsUInt16(), right.AsUInt16()).As <ushort, T>()); } else if (typeof(T) == typeof(int)) { return(Avx2.Add(left.AsInt32(), right.AsInt32()).As <int, T>()); } else if (typeof(T) == typeof(uint)) { return(Avx2.Add(left.AsUInt32(), right.AsUInt32()).As <uint, T>()); } else if (typeof(T) == typeof(long)) { return(Avx2.Add(left.AsInt64(), right.AsInt64()).As <long, T>()); } else if (typeof(T) == typeof(ulong)) { return(Avx2.Add(left.AsUInt64(), right.AsUInt64()).As <ulong, T>()); } else if (typeof(T) == typeof(float)) { return(Avx.Add(left.AsSingle(), right.AsSingle()).As <float, T>()); } else if (typeof(T) == typeof(double)) { return(Avx.Add(left.AsDouble(), right.AsDouble()).As <double, T>()); } else { throw new NotSupportedException(); } }
//[MethodImpl(MethodImplOptions.AggressiveInlining)] public static __m256 exp256_ps(__m256 V) { __m256 x = V; __m256 tmp = __m256.Zero; __m256 one = SET(1.0f); x = Avx2.Min(x, exp_hi); x = Avx2.Max(x, exp_lo); __m256 fx = Avx2.Multiply(x, cLOG2EF); fx = Avx2.Add(fx, SET(0.5f)); tmp = Avx2.Floor(fx); var mask = Avx2.Compare(tmp, fx, FloatComparisonMode.OrderedGreaterThanSignaling); mask = Avx2.And(mask, one); fx = Avx2.Subtract(tmp, mask); tmp = Avx2.Multiply(fx, cexp_C1); __m256 z = Avx2.Multiply(fx, cexp_C2); x = Avx2.Subtract(x, tmp); x = Avx2.Subtract(x, z); z = Avx2.Multiply(x, x); __m256 y = cexp_p0; y = Fma.MultiplyAdd(y, x, cexp_p1); y = Fma.MultiplyAdd(y, x, cexp_p2); y = Fma.MultiplyAdd(y, x, cexp_p3); y = Fma.MultiplyAdd(y, x, cexp_p4); y = Fma.MultiplyAdd(y, x, cexp_p5); y = Fma.MultiplyAdd(y, z, x); y = Avx2.Add(y, one); var imm0 = Avx2.ConvertToVector256Int32(fx); var F7 = Vector256.Create((int)0x7f); imm0 = Avx2.Add(imm0, F7); imm0 = Avx2.ShiftLeftLogical(imm0, 23); __m256 pow2n = Vector256.AsSingle(imm0); y = Avx2.Multiply(y, pow2n); return(y); }
public static __m256 fast_exp256_ps(__m256 V) { return(Vector256.AsSingle(Avx2.ConvertToVector256Int32WithTruncation(Fma.MultiplyAdd(EXP_C2, V, EXP_C1)))); }