public void RunStructFldScenario(SimpleBinaryOpTest__AndSingle testClass) { var result = Avx.And(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
private static void AbsAvx(ReadOnlySpan <float> a, Span <float> s) { var remainder = a.Length & 7; var length = a.Length - remainder; var mask = Avx2.ShiftRightLogical(Vector256.Create(-1), 1).AsSingle(); fixed(float *ptr = a) { fixed(float *ptrS = s) { for (var i = 0; i < length; i += 8) { var j = Avx.LoadVector256(ptr + i); Avx.Store(ptrS + i, Avx.And(mask, j)); } } } if (remainder != 0) { AbsNaive(a, s, length, a.Length); } }
public static Vector256 <T> And <T>(Vector256 <T> left, Vector256 <T> right) where T : struct { if (typeof(T) == typeof(float)) { if (Avx.IsSupported) { return(Avx.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } } if (typeof(T) == typeof(double)) { if (Avx.IsSupported) { return(Avx.And(left.AsDouble(), right.AsDouble()).As <double, T>()); } } if (Avx.IsSupported) { return(Avx.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } return(SoftwareFallbacks.And_Software(left, right)); }
public void RunFldScenario() { var result = Avx.And(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclFldScenario() { var test = new SimpleBinaryOpTest__AndDouble(); var result = Avx.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Avx.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Avx.And(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var left = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr)); var right = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr)); var result = Avx.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray2Ptr); var result = Avx.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Avx.And( Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr)), Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Avx.And( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Avx.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleBinaryOpTest__AndSingle(); var result = Avx.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Avx.And( Unsafe.Read <Vector256 <Double> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector256 <Double> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public static Vector256 <double> CrossProduct3D(Vector256 <double> left, Vector256 <double> right) { if (Avx2.IsSupported) { #region Comments /* Cross product of A(x, y, z, _) and B(x, y, z, _) is * 0 1 2 3 0 1 2 3 * * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)' * 1 2 1 2 1 2 * So we can do (Ay, Az, Ax, _) * (Bz, Bx, By, _) (last elem is irrelevant, as this is for Vector3) * which leaves us with a of the first subtraction element for each (marked 1 above) * Then we repeat with the right hand of subtractions (Az, Ax, Ay, _) * (By, Bz, Bx, _) * which leaves us with the right hand sides (marked 2 above) * Then we subtract them to get the correct vector * We then mask out W to zero, because that is required for the Vector3 representation * * We perform the first 2 multiplications by shuffling the vectors and then multiplying them * Helpers.Shuffle is the same as the C++ macro _MM_SHUFFLE, and you provide the order you wish the elements * to be in *reversed* (no clue why), so here (3, 0, 2, 1) means you have the 2nd elem (1, 0 indexed) in the first slot, * the 3rd elem (2) in the next one, the 1st elem (0) in the next one, and the 4th (3, W/_, unused here) in the last reg */ #endregion /* * lhs1 goes from x, y, z, _ to y, z, x, _ * rhs1 goes from x, y, z, _ to z, x, y, _ */ Vector256 <double> leftHandSide1 = Avx2.Permute4x64(left, ShuffleValues.YZXW); Vector256 <double> rightHandSide1 = Avx2.Permute4x64(right, ShuffleValues.ZXYW); /* * lhs2 goes from x, y, z, _ to z, x, y, _ * rhs2 goes from x, y, z, _ to y, z, x, _ */ Vector256 <double> leftHandSide2 = Avx2.Permute4x64(left, ShuffleValues.ZXYW); Vector256 <double> rightHandSide2 = Avx2.Permute4x64(right, ShuffleValues.YZXW); Vector256 <double> mul1 = Avx.Multiply(leftHandSide1, rightHandSide1); Vector256 <double> mul2 = Avx.Multiply(leftHandSide2, rightHandSide2); Vector256 <double> resultNonMaskedW = Avx.Subtract(mul1, mul2); return(Avx.And(resultNonMaskedW, DoubleConstants.MaskW)); // TODO reuse vectors (minimal register usage) - potentially prevent any stack spilling } return(CrossProduct3D_Software(left, right)); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var left = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr)); var right = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr)); var result = Avx.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray1Ptr); var op2 = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray2Ptr); var result = Avx.And(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var op1 = Avx.LoadVector256((Single *)(_dataTable.inArray1Ptr)); var op2 = Avx.LoadVector256((Single *)(_dataTable.inArray2Ptr)); var result = Avx.And(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var left = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr); var result = Avx.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Avx.And( Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr)), Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Avx.And( Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Avx.And( Avx.LoadVector256((Single *)(&test._fld1)), Avx.LoadVector256((Single *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__AndSingle testClass) { fixed(Vector256 <Single> *pFld1 = &_fld1) fixed(Vector256 <Single> *pFld2 = &_fld2) { var result = Avx.And( Avx.LoadVector256((Single *)(pFld1)), Avx.LoadVector256((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector256 <Single> *pFld1 = &_fld1) fixed(Vector256 <Single> *pFld2 = &_fld2) { var result = Avx.And( Avx.LoadVector256((Single *)(pFld1)), Avx.LoadVector256((Single *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector256 <Double> *pClsVar1 = &_clsVar1) fixed(Vector256 <Double> *pClsVar2 = &_clsVar2) { var result = Avx.And( Avx.LoadVector256((Double *)(pClsVar1)), Avx.LoadVector256((Double *)(pClsVar2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); } }
internal static unsafe (UnsafeMemory <BitState> bits, bool isValidBinary) ToBitStates(ReadOnlySpan <byte> valueText, BitAllocator bitAlloc) { UnsafeMemory <BitState> bitsMem = bitAlloc.GetBits(valueText.Length); Span <BitState> bits = bitsMem.Span; ulong isValidBinary = 0; int index = 0; if (Ssse3.IsSupported && bits.Length >= Vector128 <byte> .Count) { int vecBitCount = bits.Length / Vector128 <byte> .Count; fixed(BitState *bitsPtr = bits) { fixed(byte *textPtr = valueText) { Vector128 <ulong> isValidBin = Vector128 <ulong> .Zero; for (; index < vecBitCount; index++) { var charText = Avx.LoadVector128(textPtr + index * Vector128 <byte> .Count); var byteText = Avx.Shuffle(charText, shuffleIdxs); var firstBit = Avx.And(onlyFirstBit, Avx.Or(byteText, Avx.ShiftRightLogical(byteText.AsInt32(), 1).AsByte())); var secondBit = Avx.And(onlySecondBit, Avx.ShiftRightLogical(byteText.AsInt32(), 5).AsByte()); var bytesAsBitStates = Avx.Or(firstBit, secondBit); Avx.Store((byte *)bitsPtr + bits.Length - (index + 1) * Vector128 <byte> .Count, bytesAsBitStates); isValidBin = Avx.Or(isValidBin, secondBit.AsUInt64()); } isValidBinary = isValidBin.GetElement(0) | isValidBin.GetElement(1); } } index *= Vector128 <byte> .Count; } for (; index < bits.Length; index++) { BitState bit = ToBitState(valueText[index]); bits[bits.Length - index - 1] = bit; isValidBinary |= (uint)bit & 0b10; } return(bitsMem, isValidBinary == 0); }
public void RunClassLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load)); var test = new SimpleBinaryOpTest__AndSingle(); fixed(Vector256 <Single> *pFld1 = &test._fld1) fixed(Vector256 <Single> *pFld2 = &test._fld2) { var result = Avx.And( Avx.LoadVector256((Single *)(pFld1)), Avx.LoadVector256((Single *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); } }
public Vector256 <double> Permute(Vector256 <double> left, Vector256 <double> right) { Vector256 <double> mul = Avx.Multiply(left, right); // Set W to zero Vector256 <double> result = Avx.And(mul, MaskWDouble); // We now have (X, Y, Z, 0) correctly, and want to add them together and fill with that result result = Avx.HorizontalAdd(result, result); // Now we have (X + Y, X + Y, Z + 0, Z + 0) result = Avx.Add(result, Avx.Permute2x128(result, result, 0b_0000_0001)); // We switch the 2 halves, and add that to the original, getting the result in all elems // Set W to zero result = Avx.And(result, MaskWDouble); return(result); }
public (double near, double far) IntersectAVX(Ray ray) { Vector256 <double> origin = (Vector256 <double>)ray.Origin; Vector256 <double> direction = (Vector256 <double>)ray.Direction; Vector256 <double> zeroes = new Vector256 <double>(); Vector256 <double> min = (Vector256 <double>)Minimum; Vector256 <double> max = (Vector256 <double>)Maximum; // Replace slabs that won't be checked (0 direction axis) with infinity so that NaN doesn't propagate Vector256 <double> dirInfMask = Avx.And( Avx.Compare(direction, zeroes, FloatComparisonMode.OrderedEqualNonSignaling), Avx.And( Avx.Compare(origin, min, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling), Avx.Compare(origin, max, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling))); min = Avx.BlendVariable(min, SIMDHelpers.BroadcastScalar4(double.NegativeInfinity), dirInfMask); max = Avx.BlendVariable(max, SIMDHelpers.BroadcastScalar4(double.PositiveInfinity), dirInfMask); // Flip slabs in direction axes that are negative (using direction as mask takes the most significant bit, the sign.. probably includes -0) Vector256 <double> minMasked = Avx.BlendVariable(min, max, direction); Vector256 <double> maxMasked = Avx.BlendVariable(max, min, direction); direction = Avx.Divide(Vector256.Create(1D), direction); Vector256 <double> near4 = Avx.Multiply(Avx.Subtract(minMasked, origin), direction); Vector256 <double> far4 = Avx.Multiply(Avx.Subtract(maxMasked, origin), direction); Vector128 <double> near2 = Sse2.Max(near4.GetLower(), near4.GetUpper()); near2 = Sse2.MaxScalar(near2, SIMDHelpers.Swap(near2)); Vector128 <double> far2 = Sse2.Min(far4.GetLower(), far4.GetUpper()); far2 = Sse2.MinScalar(far2, SIMDHelpers.Swap(far2)); if (Sse2.CompareScalarOrderedGreaterThan(near2, far2) | Sse2.CompareScalarOrderedLessThan(far2, new Vector128 <double>())) { return(double.NaN, double.NaN); } return(near2.ToScalar(), far2.ToScalar()); }
public Vector256 <double> DoubleHadd(Vector256 <double> left, Vector256 <double> right) { Vector256 <double> mul = Avx.Multiply(left, right); // Set W to zero Vector256 <double> result = Avx.And(mul, MaskWDouble); // We now have (X, Y, Z, 0) correctly, and want to add them together and fill with that result result = Avx.HorizontalAdd(result, result); // Now we have (X + Y, X + Y, Z + 0, Z + 0) result = Avx.Shuffle(result, result, ShuffleValues._3_1_2_0); result = Avx.HorizontalAdd(result, result); // We switch the 2 halves, and add that to the original, getting the result in all elems // Set W to zero result = Avx.And(result, MaskWDouble); return(result); }