internal static Vector256 <T> MultiplyAddVector256(Vector256 <T> a, Vector256 <T> b, Vector256 <T> c) { if (typeof(T) == typeof(int)) { var va = a.As <T, int>(); var vb = b.As <T, int>(); var vl = Avx2.MultiplyLow(va, vb); var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper()); return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, int>()).As <int, T>()); } if (typeof(T) == typeof(uint)) { var va = a.As <T, uint>(); var vb = b.As <T, uint>(); var vl = Avx2.MultiplyLow(va, vb); var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper()); return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, uint>()).As <uint, T>()); } if (typeof(T) == typeof(float)) { return(Fma.MultiplyAdd(a.As <T, float>(), b.As <T, float>(), c.As <T, float>()).As <float, T>()); } if (typeof(T) == typeof(double)) { return(Fma.MultiplyAdd(a.As <T, double>(), b.As <T, double>(), c.As <T, double>()).As <double, T>()); } throw new NotSupportedException(); }
internal static Vector256 <T> ShuffleVector256(Vector256 <T> va, Vector256 <T> vb, byte control) { if (typeof(T) == typeof(float)) { return(Avx.Shuffle(va.As <T, float>(), vb.As <T, float>(), control).As <float, T>()); } if (typeof(T) == typeof(double)) { return(Avx.Shuffle(va.As <T, double>(), vb.As <T, double>(), control).As <double, T>()); } throw new NotSupportedException(); }
internal static Vector256 <T> ShuffleVector256(Vector256 <T> va, byte control) { if (typeof(T) == typeof(int)) { return(Avx2.Shuffle(va.As <T, int>(), control).As <int, T>()); } if (typeof(T) == typeof(uint)) { return(Avx2.Shuffle(va.As <T, uint>(), control).As <uint, T>()); } throw new NotSupportedException(); }
internal static Vector256 <T> ShuffleVector256(Vector256 <T> va, Vector256 <T> vb) { if (typeof(T) == typeof(sbyte)) { return(Avx2.Shuffle(va.As <T, sbyte>(), vb.As <T, sbyte>()).As <sbyte, T>()); } if (typeof(T) == typeof(byte)) { return(Avx2.Shuffle(va.As <T, byte>(), vb.As <T, byte>()).As <byte, T>()); } throw new NotSupportedException(); }
/// <summary> /// Absolute error bounded by 1e-4. /// </summary> public static Vector256 <float> Log(Vector256 <float> x) { Vector256 <float> exp, addcst, val; exp = Avx2.ConvertToVector256Single(Avx2.ShiftRightArithmetic(x.As <float, int>(), 23)); // According to BenchmarkDotNet, isolating all the constants up-front // yield nearly 10% speed-up. const float bf0 = -89.970756366f; const float bf1 = float.NaN; // behavior of MathF.Log() on negative numbers const float bf2 = 3.529304993f; const float bf3 = -2.461222105f; const float bf4 = 1.130626167f; const float bf5 = -0.288739945f; const float bf6 = 3.110401639e-2f; const float bf7 = 0.6931471805f; const int bi0 = 0x7FFFFF; const int bi1 = 0x3F800000; //addcst = val > 0 ? -89.970756366f : -(float)INFINITY; addcst = Avx.BlendVariable(Vector256.Create(bf0), Vector256.Create(bf1), Avx.Compare(x, Vector256 <float> .Zero, FloatComparisonMode.OrderedLessThanNonSignaling)); val = Avx2.Or(Avx2.And( x.As <float, int>(), Vector256.Create(bi0)), Vector256.Create(bi1)).As <int, float>(); /* x * (3.529304993f + * x * (-2.461222105f + * x * (1.130626167f + * x * (-0.288739945f + * x * 3.110401639e-2f)))) + (addcst + 0.6931471805f*exp); */ return(Avx2.Add( Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf2), Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf3), Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf4), Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf5), Avx2.Multiply(val, Vector256.Create(bf6)))))))))), Avx.Add(addcst, Avx2.Multiply(Vector256.Create(bf7), exp)))); }
public static Vector256 <float> Exp(Vector256 <float> value) { value = Min(value, MaxValue); value = Max(value, MinValue); Vector256 <float> fx = Multiply(value, Log2); fx = Floor(Add(fx, Point5)); Vector256 <float> tmp = Multiply(fx, C1); Vector256 <float> z = Multiply(fx, C2); Vector256 <float> x = Subtract(value, tmp); x = Subtract(x, z); z = Multiply(x, x); Vector256 <float> y = P0; y = Add(Multiply(y, x), P1); y = Add(Multiply(y, x), P2); y = Add(Multiply(y, x), P3); y = Add(Multiply(y, x), P4); y = Add(Multiply(y, x), Point5); y = Add(Add(Multiply(y, z), x), One); Vector256 <int> pow2n = ConvertToVector256Int32(fx); pow2n = Avx2.Add(pow2n, Ox7); pow2n = Avx2.ShiftLeftLogical(pow2n, 23); return(Multiply(y, pow2n.As <float>())); }
public static Vector256 <float> Log(Vector256 <float> value) { Vector256 <float> invalidMask = Compare(value, Vector256 <float> .Zero, FloatComparisonMode.LessThanOrEqualOrderedNonSignaling); Vector256 <float> x = Max(value, MinNormPos.As <float>()); Vector256 <int> ei = Avx2.ShiftRightLogical(x.As <int>(), 23); x = Or(And(x, MantMask.As <float>()), Point5); ei = Avx2.Subtract(ei, Ox7); Vector256 <float> e = Add(ConvertToVector256Single(ei), One); Vector256 <float> mask = Compare(x, Sqrthf, FloatComparisonMode.LessThanOrderedNonSignaling); Vector256 <float> tmp = And(x, mask); x = Subtract(x, One); e = Subtract(e, And(One, mask)); x = Add(x, tmp); Vector256 <float> z = Multiply(x, x); Vector256 <float> y = LogP0; y = Add(Multiply(y, x), LogP1); y = Add(Multiply(y, x), LogP2); y = Add(Multiply(y, x), LogP3); y = Add(Multiply(y, x), LogP4); y = Add(Multiply(y, x), LogP5); y = Add(Multiply(y, x), LogP6); y = Add(Multiply(y, x), LogP7); y = Add(Multiply(y, x), LogP8); y = Multiply(Multiply(y, x), z); y = Add(y, Multiply(e, LogQ1)); y = Subtract(y, Multiply(z, Point5)); x = Add(Add(x, y), Multiply(e, LogQ2)); return(Or(x, invalidMask)); }
public static void Store(T *address, Vector256 <T> vector256) { if (typeof(T) == typeof(sbyte)) { Avx.Store((sbyte *)address, vector256.As <T, sbyte>()); } else if (typeof(T) == typeof(byte)) { Avx.Store((byte *)address, vector256.As <T, byte>()); } else if (typeof(T) == typeof(short)) { Avx.Store((short *)address, vector256.As <T, short>()); } else if (typeof(T) == typeof(ushort)) { Avx.Store((ushort *)address, vector256.As <T, ushort>()); } else if (typeof(T) == typeof(int)) { Avx.Store((int *)address, vector256.As <T, int>()); } else if (typeof(T) == typeof(uint)) { Avx.Store((uint *)address, vector256.As <T, uint>()); } else if (typeof(T) == typeof(long)) { Avx.Store((long *)address, vector256.As <T, long>()); } else if (typeof(T) == typeof(ulong)) { Avx.Store((ulong *)address, vector256.As <T, ulong>()); } else if (typeof(T) == typeof(float)) { Avx.Store((float *)address, vector256.As <T, float>()); } else if (typeof(T) == typeof(double)) { Avx.Store((double *)address, vector256.As <T, double>()); } else { throw new NotSupportedException(); } }
internal static Vector256 <T> Permute4X64Vector256(Vector256 <T> va, byte control) { if (typeof(T) == typeof(long)) { return(Avx2.Permute4x64(va.As <T, long>(), control).As <long, T>()); } if (typeof(T) == typeof(ulong)) { return(Avx2.Permute4x64(va.As <T, ulong>(), control).As <ulong, T>()); } if (typeof(T) == typeof(double)) { return(Avx2.Permute4x64(va.As <T, double>(), control).As <double, T>()); } throw new NotSupportedException(); }
public static Vector256 <T> And(Vector256 <T> va, Vector256 <T> vb) { if (typeof(T) == typeof(sbyte)) { return(Avx2.And(va.As <T, sbyte>(), vb.As <T, sbyte>()).As <sbyte, T>()); } if (typeof(T) == typeof(byte)) { return(Avx2.And(va.As <T, byte>(), vb.As <T, byte>()).As <byte, T>()); } if (typeof(T) == typeof(short)) { return(Avx2.And(va.As <T, short>(), vb.As <T, short>()).As <short, T>()); } if (typeof(T) == typeof(ushort)) { return(Avx2.And(va.As <T, ushort>(), vb.As <T, ushort>()).As <ushort, T>()); } if (typeof(T) == typeof(int)) { return(Avx2.And(va.As <T, int>(), vb.As <T, int>()).As <int, T>()); } if (typeof(T) == typeof(uint)) { return(Avx2.And(va.As <T, uint>(), vb.As <T, uint>()).As <uint, T>()); } if (typeof(T) == typeof(long)) { return(Avx2.And(va.As <T, long>(), vb.As <T, long>()).As <long, T>()); } if (typeof(T) == typeof(ulong)) { return(Avx2.And(va.As <T, ulong>(), vb.As <T, ulong>()).As <ulong, T>()); } if (typeof(T) == typeof(float)) { return(Avx.And(va.As <T, float>(), vb.As <T, float>()).As <float, T>()); } if (typeof(T) == typeof(double)) { return(Avx.And(va.As <T, double>(), vb.As <T, double>()).As <double, T>()); } throw new NotSupportedException(); }
internal static Vector256 <T> Permute2X128Vector256(Vector256 <T> va, Vector256 <T> vb, byte control) { if (typeof(T) == typeof(sbyte)) { return(Avx2.Permute2x128(va.As <T, sbyte>(), vb.As <T, sbyte>(), control).As <sbyte, T>()); } if (typeof(T) == typeof(byte)) { return(Avx2.Permute2x128(va.As <T, byte>(), vb.As <T, byte>(), control).As <byte, T>()); } if (typeof(T) == typeof(short)) { return(Avx2.Permute2x128(va.As <T, short>(), vb.As <T, short>(), control).As <short, T>()); } if (typeof(T) == typeof(ushort)) { return(Avx2.Permute2x128(va.As <T, ushort>(), vb.As <T, ushort>(), control).As <ushort, T>()); } if (typeof(T) == typeof(int)) { return(Avx2.Permute2x128(va.As <T, int>(), vb.As <T, int>(), control).As <int, T>()); } if (typeof(T) == typeof(uint)) { return(Avx2.Permute2x128(va.As <T, uint>(), vb.As <T, uint>(), control).As <uint, T>()); } if (typeof(T) == typeof(long)) { return(Avx2.Permute2x128(va.As <T, long>(), vb.As <T, long>(), control).As <long, T>()); } if (typeof(T) == typeof(ulong)) { return(Avx2.Permute2x128(va.As <T, ulong>(), vb.As <T, ulong>(), control).As <ulong, T>()); } if (typeof(T) == typeof(float)) { return(Avx.Permute2x128(va.As <T, float>(), vb.As <T, float>(), control).As <float, T>()); } if (typeof(T) == typeof(double)) { return(Avx.Permute2x128(va.As <T, double>(), vb.As <T, double>(), control).As <double, T>()); } throw new NotSupportedException(); }
internal static Vector256 <T> UnpackHighVector256(Vector256 <T> value, Vector256 <T> data) { if (typeof(T) == typeof(sbyte)) { return(Avx2.UnpackHigh(value.As <T, sbyte>(), data.As <T, sbyte>()).As <sbyte, T>()); } if (typeof(T) == typeof(byte)) { return(Avx2.UnpackHigh(value.As <T, byte>(), data.As <T, byte>()).As <byte, T>()); } if (typeof(T) == typeof(short)) { return(Avx2.UnpackHigh(value.As <T, short>(), data.As <T, short>()).As <short, T>()); } if (typeof(T) == typeof(ushort)) { return(Avx2.UnpackHigh(value.As <T, ushort>(), data.As <T, ushort>()).As <ushort, T>()); } if (typeof(T) == typeof(int)) { return(Avx2.UnpackHigh(value.As <T, int>(), data.As <T, int>()).As <int, T>()); } if (typeof(T) == typeof(uint)) { return(Avx2.UnpackHigh(value.As <T, uint>(), data.As <T, uint>()).As <uint, T>()); } if (typeof(T) == typeof(long)) { return(Avx2.UnpackHigh(value.As <T, long>(), data.As <T, long>()).As <long, T>()); } if (typeof(T) == typeof(ulong)) { return(Avx2.UnpackHigh(value.As <T, ulong>(), data.As <T, ulong>()).As <ulong, T>()); } if (typeof(T) == typeof(float)) { return(Avx.UnpackHigh(value.As <T, float>(), data.As <T, float>()).As <float, T>()); } if (typeof(T) == typeof(double)) { return(Avx.UnpackHigh(value.As <T, double>(), data.As <T, double>()).As <double, T>()); } throw new NotSupportedException(); }
public static void Avx2Reverse256InPlace(Span <byte> bytes) { fixed(byte *inputPointer = bytes) { Vector256 <byte> inputVector = Avx2.LoadVector256(inputPointer); Vector256 <byte> resultVector = Avx2.Shuffle(inputVector, ReverseMaskVec); resultVector = Avx2.Permute4x64(resultVector.As <byte, ulong>(), 0b01001110).As <ulong, byte>(); Avx2.Store(inputPointer, resultVector); } }
public void Avx2Version() { byte[] bytes = _a; unsafe { fixed(byte *ptr_bytes = bytes) { Vector256 <byte> inputVector = Avx2.LoadVector256(ptr_bytes); Vector256 <byte> result = Avx2.Shuffle(inputVector, _shuffleMask); result = Avx2.Permute4x64(result.As <byte, ulong>(), 0b01001110).As <ulong, byte>(); Avx2.Store(ptr_bytes, result); } } }
public static Vector256 <T> SelectWhereFalse <T, U>(Vector256 <T> vector, Vector256 <U> selector) where T : struct where U : struct => AndNot(selector.As <U, T>(), vector);
public static Vector256 <T> Select <T, U>(Vector256 <T> left, Vector256 <T> right, Vector256 <U> selector) where T : struct where U : struct => Or(And(selector.As <U, T>(), right), AndNot(selector.As <U, T>(), left));
static unsafe int FindIndexOfShortAtEvenIndexHavingValue(Span <int> data, short searchValue, int startIndex, int maxIndex) { // For convenience/efficiency we require arrays to be divisible by 8 Debug.Assert(data.Length % 8 == 0); Span <short> dTargetValue = stackalloc short[] { searchValue, short.MinValue, searchValue, short.MinValue, searchValue, short.MinValue, searchValue, short.MinValue, searchValue, short.MinValue, searchValue, short.MinValue, searchValue, short.MinValue, searchValue, short.MinValue }; int numBlocksProcessed = startIndex / 8; int maskMove = 0; fixed(short *pTargetValue = &dTargetValue[0]) fixed(int *pStartData = &data[0]) { short * pStartDataShort = (short *)pStartData; Vector256 <short> targetData = Avx.LoadVector256(pTargetValue); for (int i = numBlocksProcessed * 8 * 2; i < data.Length * 2; i += 8 * 2) { // Load this set of values to examine Vector256 <short> vValues = Avx.LoadVector256(&pStartDataShort[i]); // Compare for equality Vector256 <short> vEQ = Avx2.CompareEqual(vValues, targetData); // Get resulting equality mask so we can tell which index within block had target value Vector256 <byte> equalityAsBytes = vEQ.As <short, byte>(); maskMove = Avx2.MoveMask(equalityAsBytes); if (maskMove != 0) { break; } numBlocksProcessed++; } // Translate mask into which index int indexInBlock; if (maskMove <= ThreePow3) { indexInBlock = maskMove switch { ThreePow0 => 0, ThreePow1 => 1, ThreePow2 => 2, ThreePow3 => 3, _ => - 2 // false accidental match at odd index }; } else { indexInBlock = maskMove switch { ThreePow4 => 4, ThreePow5 => 5, ThreePow6 => 6, ThreePow7 => 7, _ => - 2 // false accidental match at odd index }; } if (indexInBlock == -2) { return(-2); } int index = numBlocksProcessed * 8 + indexInBlock; if (index < startIndex || index > maxIndex) { return(-1); } else { return(index); } } Console.WriteLine(maskMove); }
private static Vector256 <ulong> ror64_32_avx(ref Vector256 <ulong> x) => Avx2.Shuffle(x.As <uint>(), 0b_10_11_00_01).As <ulong>();
public Vector256 <T> Vec <T>() where T : unmanaged => Vector256.As <byte, T>(vymm);