private static unsafe (Vector256 <UInt32>[], int shift) ToVector(UInt32[] arr) { int first_zeros = FirstZeros(arr), digits = Digits(arr); Vector256 <UInt32>[] v = new Vector256 <UInt32> [(digits - first_zeros + Vector256 <UInt64> .Count - 1) / Vector256 <UInt64> .Count]; fixed(UInt32 *p = arr) { fixed(Vector256 <UInt32> *pv = v) { if ((digits - first_zeros) % Vector256 <UInt64> .Count == 0) { for (int i = 0, j = first_zeros; i < v.Length; i++, j += Vector256 <UInt64> .Count) { pv[i] = Avx2.ConvertToVector256Int64(Avx.LoadVector128(p + j)).AsUInt32(); } } else { int i, j; for (i = 0, j = first_zeros; i < v.Length - 1; i++, j += Vector256 <UInt64> .Count) { pv[i] = Avx2.ConvertToVector256Int64(Avx.LoadVector128(p + j)).AsUInt32(); } pv[i] = Avx2.ConvertToVector256Int64(Avx2.MaskLoad(p + j, Mask128.LSV((uint)(digits - j)))).AsUInt32(); } } } return(v, first_zeros); }
public static unsafe int AVXVecMinIndependent(int[] x) { int len = x.Length; var min1 = Vector128.Create(int.MaxValue); var min2 = Vector128.Create(int.MaxValue); fixed(int *pSource = x) { int i = 0; int lastBlockIndex = len - (len % 8); while (i < lastBlockIndex) { min1 = Avx.Min(min1, Avx.LoadVector128(pSource + i)); min2 = Avx.Min(min2, Avx.LoadVector128(pSource + i + 4)); i += 8; } var minValue = min1.ToScalar() + min2.ToScalar(); while (i < len) { minValue = MinBranchFree(minValue, pSource[i]); i += 1; } return(minValue); } }
unsafe static bool TestXmm_CanCSE() { const int VecLen = 4; int result = -1; var mem = stackalloc float [VecLen]; var memSpan = new Span <float> (mem, VecLen); for (int i = 0; i < 1; i++) { if (Avx.IsSupported) { Vector128 <float> x1, x2, x3, x4; Vector128 <float> x5, x6, x7; memSpan.Fill(25); x1 = Avx.LoadVector128(mem); x2 = Avx.LoadVector128(mem); x3 = Avx.LoadVector128(mem); x4 = Avx.LoadVector128(mem); x5 = Avx.Add(x1, x2); x6 = Avx.Add(x3, x4); x7 = Avx.Add(x5, x6); Avx.Store(mem, x7); WriteArray(mem, VecLen); } else if (AdvSimd.IsSupported) { Vector128 <float> x1, x2, x3, x4; Vector128 <float> x5, x6, x7; memSpan.Fill(25); x1 = AdvSimd.LoadVector128(mem); x2 = AdvSimd.LoadVector128(mem); x3 = AdvSimd.LoadVector128(mem); x4 = AdvSimd.LoadVector128(mem); x5 = AdvSimd.Add(x1, x2); x6 = AdvSimd.Add(x3, x4); x7 = AdvSimd.Add(x5, x6); AdvSimd.Store(mem, x7); WriteArray(mem, VecLen); } else { Console.WriteLine("Hardware Intrinsics not supported"); return(true); } } if (mem[0] != 100.00) { Console.WriteLine("XMM_CanCSE Test Failed"); return(false); } return(true); }
public static unsafe int AVXVecMin(int[] x) { int len = x.Length; var min = Vector128.Create(int.MaxValue); fixed(int *pSource = x) { int i = 0; int lastBlockIndex = len - (len % 4); while (i < lastBlockIndex) { min = Avx.Min(min, Avx.LoadVector128(pSource + i)); i += 4; } var minValue = min.ToScalar(); while (i < len) { minValue = MinBranchFree(minValue, pSource[i]); i += 1; } return(minValue); } }
public unsafe void IntrinsicsAVX(int cores) { Parallel.For(1, cores + 1, index => { int max = 1048576 / cores; long[] stor1 = new long[16]; long[] stor2 = new long[16]; for (int position = 0; position < stor1.Length; position++) { stor1[position] = 0x5555555555555555; stor2[position] = 0x2AAAAAAAAAAAAAAA; } fixed(long *pStor1 = stor1) fixed(long *pStor2 = stor2) { Vector128 <long> s1 = Avx.LoadVector128(pStor1); Vector128 <long> s2 = Avx.LoadVector128(pStor2); // This may be hard to understand: I want to have 2 calls to reach 256 bytes. for (int bufCnt = 0; bufCnt < max; bufCnt++) { s1 = Avx.Xor(s1, Avx.Xor(s1, s2)).AsInt64(); } } }); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var op1 = Avx.LoadVector128((Single *)(_dataTable.inArray1Ptr)); var op2 = Avx.LoadVector128((Int32 *)(_dataTable.inArray2Ptr)); var result = Avx.PermuteVar(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var left = Avx.LoadVector256((Int16 *)(_dataTable.inArray1Ptr)); var right = Avx.LoadVector128((Int16 *)(_dataTable.inArray2Ptr)); var result = Avx.InsertVector128(left, right, 1); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var left = Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)); var right = Avx.LoadVector128((Double *)(_dataTable.inArray2Ptr)); Avx.MaskStore((Double *)_dataTable.outArrayPtr, left, right); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Avx.PermuteVar( Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((Int64 *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var first = Avx.LoadVector128((Int32 *)(_dataTable.inArray0Ptr)); var second = Avx.LoadVector128((Byte *)(_dataTable.inArray1Ptr)); var third = Avx.LoadVector128((SByte *)(_dataTable.inArray2Ptr)); var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(first, second, third, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); Avx.MaskStore( (Double *)_dataTable.outArrayPtr, Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((Double *)(_dataTable.inArray2Ptr)) ); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Avx.PermuteVar( Avx.LoadVector128((Double *)(&test._fld1)), Avx.LoadVector128((Int64 *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result1 = AvxVnni.MultiplyWideningAndAdd( Avx.LoadVector128((Int32 *)(_dataTable.inArray0Ptr)), Avx.LoadVector128((Byte *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((SByte *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result1); ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunReflectionScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); var result = typeof(Avx).GetMethod(nameof(Avx.PermuteVar), new Type[] { typeof(Vector128 <Double>), typeof(Vector128 <Int64>) }) .Invoke(null, new object[] { Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((Int64 *)(_dataTable.inArray2Ptr)) }); Unsafe.Write(_dataTable.outArrayPtr, (Vector128 <Double>)(result)); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public static unsafe Vector128 <int> AsInt(this Vector256 <long> l) { // (0, 1, 0, 2, 0, 3, 0, 4) -> (1, 2, 1, 2, 3, 4, 3, 4) var v = Avx2.Shuffle( l.AsInt32(), 136 ); var content = stackalloc int[8]; Avx2.Store(content, v); // (1, 2, 1, 2, 3, 4, 3, 4) -> (1, 2, 3, 4) return(Avx.LoadVector128(content + 2)); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Avx.InsertVector128( Avx.LoadVector256((Byte *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((Byte *)(_dataTable.inArray2Ptr)), 1 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunReflectionScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); typeof(Avx).GetMethod(nameof(Avx.MaskStore), new Type[] { typeof(Double *), typeof(Vector128 <Double>), typeof(Vector128 <Double>) }) .Invoke(null, new object[] { Pointer.Box(_dataTable.outArrayPtr, typeof(Double *)), Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((Double *)(_dataTable.inArray2Ptr)) }); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
private static void LoadMessage( int round, ulong *block, int *permutations, Vector256 <ulong> ffMask, Span <Vector256 <ulong> > permutedMsg) { Debug.Assert(permutedMsg.Length == 4); for (int i = 0; i < 4; i++) { var offset = round * 16 + i * Vector128 <int> .Count; var permutation = Avx.LoadVector128(permutations + offset); permutedMsg[i] = Avx2.GatherMaskVector256( source: default, // what does this do?
public void RunReflectionScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128 <Int32>), typeof(Vector128 <Byte>), typeof(Vector128 <SByte>) }) .Invoke(null, new object[] { Avx.LoadVector128((Int32 *)(_dataTable.inArray0Ptr)), Avx.LoadVector128((Byte *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((SByte *)(_dataTable.inArray2Ptr)) }); Unsafe.Write(_dataTable.outArrayPtr, (Vector128 <Int32>)(result)); ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__PermuteVarDouble testClass) { fixed(Vector128 <Double> *pFld1 = &_fld1) fixed(Vector128 <Int64> *pFld2 = &_fld2) { var result = Avx.PermuteVar( Avx.LoadVector128((Double *)(pFld1)), Avx.LoadVector128((Int64 *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunReflectionScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); var result = typeof(Avx).GetMethod(nameof(Avx.InsertVector128), new Type[] { typeof(Vector256 <Int16>), typeof(Vector128 <Int16>), typeof(byte) }) .Invoke(null, new object[] { Avx.LoadVector256((Int16 *)(_dataTable.inArray1Ptr)), Avx.LoadVector128((Int16 *)(_dataTable.inArray2Ptr)), (byte)1 }); Unsafe.Write(_dataTable.outArrayPtr, (Vector256 <Int16>)(result)); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Single> *pClsVar1 = &_clsVar1) fixed(Vector128 <Int32> *pClsVar2 = &_clsVar2) { var result = Avx.PermuteVar( Avx.LoadVector128((Single *)(pClsVar1)), Avx.LoadVector128((Int32 *)(pClsVar2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Double> *pFld1 = &_fld1) fixed(Vector128 <Int64> *pFld2 = &_fld2) { var result = Avx.PermuteVar( Avx.LoadVector128((Double *)(pFld1)), Avx.LoadVector128((Int64 *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
internal static unsafe (UnsafeMemory <BitState> bits, bool isValidBinary) ToBitStates(ReadOnlySpan <byte> valueText, BitAllocator bitAlloc) { UnsafeMemory <BitState> bitsMem = bitAlloc.GetBits(valueText.Length); Span <BitState> bits = bitsMem.Span; ulong isValidBinary = 0; int index = 0; if (Ssse3.IsSupported && bits.Length >= Vector128 <byte> .Count) { int vecBitCount = bits.Length / Vector128 <byte> .Count; fixed(BitState *bitsPtr = bits) { fixed(byte *textPtr = valueText) { Vector128 <ulong> isValidBin = Vector128 <ulong> .Zero; for (; index < vecBitCount; index++) { var charText = Avx.LoadVector128(textPtr + index * Vector128 <byte> .Count); var byteText = Avx.Shuffle(charText, shuffleIdxs); var firstBit = Avx.And(onlyFirstBit, Avx.Or(byteText, Avx.ShiftRightLogical(byteText.AsInt32(), 1).AsByte())); var secondBit = Avx.And(onlySecondBit, Avx.ShiftRightLogical(byteText.AsInt32(), 5).AsByte()); var bytesAsBitStates = Avx.Or(firstBit, secondBit); Avx.Store((byte *)bitsPtr + bits.Length - (index + 1) * Vector128 <byte> .Count, bytesAsBitStates); isValidBin = Avx.Or(isValidBin, secondBit.AsUInt64()); } isValidBinary = isValidBin.GetElement(0) | isValidBin.GetElement(1); } } index *= Vector128 <byte> .Count; } for (; index < bits.Length; index++) { BitState bit = ToBitState(valueText[index]); bits[bits.Length - index - 1] = bit; isValidBinary |= (uint)bit & 0b10; } return(bitsMem, isValidBinary == 0); }
static unsafe Mask128() { lstable = new Vector128 <UInt32> [MM128UInt32s]; mstable = new Vector128 <UInt32> [MM128UInt32s]; UInt32[] value = new UInt32[7] { ~0u, ~0u, ~0u, 0u, 0u, 0u, 0u }; fixed(UInt32 *v = value) { for (int i = 0; i < lstable.Length; i++) { lstable[i] = Avx.LoadVector128(v + (MM128UInt32s - 1 - i)); mstable[i] = Avx2.Xor(Vector128.Create(~0u), lstable[i]); } } }
public unsafe void IntrinsicsAVX(byte[] oldScreen, byte[] newScreen, byte[] difference) { int steps = difference.Length / 16; fixed(byte *pOld = oldScreen) fixed(byte *pNew = newScreen) fixed(byte *pDiff = difference) { long *ppOld = (long *)pOld; long *ppNew = (long *)pNew; long *ppDiff = (long *)pDiff; for (int position = 0; position < steps; ppOld += 2, ppNew += 2, ppDiff += 2, position++) { Avx.Store(ppDiff, Avx.Xor(Avx.LoadVector128(ppOld), Avx.LoadVector128(ppNew))); } } }
public void RunClassLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load)); var test = new SimpleBinaryOpTest__PermuteVarSingle(); fixed(Vector128 <Single> *pFld1 = &test._fld1) fixed(Vector128 <Int32> *pFld2 = &test._fld2) { var result = Avx.PermuteVar( Avx.LoadVector128((Single *)(pFld1)), Avx.LoadVector128((Int32 *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); } }
public unsafe void IntrinsicsAVX(int cores) { byte[] oldScreen = new byte[256]; byte[] newScreen = new byte[256]; Random rng = new Random(); rng.NextBytes(oldScreen); rng.NextBytes(newScreen); Parallel.For(1, cores + 1, index => { int max = 1048576 / cores; byte[] difference = new byte[256]; for (int position = 0; position < 256; position++) { difference[position] = 0x00; } int steps = difference.Length / 16; fixed(byte *pOld = oldScreen) fixed(byte *pNew = newScreen) fixed(byte *pDiff = difference) for (int bufCnt = 0; bufCnt < max; bufCnt++) { long *ppOld = (long *)pOld; long *ppNew = (long *)pNew; long *ppDiff = (long *)pDiff; for (int position = 0; position < steps; ppOld += 2, ppNew += 2, ppDiff += 2, position++) { Avx.Store(ppDiff, Avx.Xor(Avx.LoadVector128(ppOld), Avx.LoadVector128(ppNew))); } } }); }
public unsafe void IntrinsicsAVX(byte[] oldScreen, byte[] newScreen, byte[] difference, int cores) { int steps = difference.Length / 16; int max = 1048576 / cores; Parallel.For(1, cores + 1, index => { fixed(byte *pOld = oldScreen) fixed(byte *pNew = newScreen) fixed(byte *pDiff = difference) for (int bufCnt = 0; bufCnt < max; bufCnt++) { long *ppOld = (long *)pOld; long *ppNew = (long *)pNew; long *ppDiff = (long *)pDiff; for (int position = 0; position < steps; ppOld += 2, ppNew += 2, ppDiff += 2, position++) { Avx.Store(ppDiff, Avx.Xor(Avx.LoadVector128(ppOld), Avx.LoadVector128(ppNew))); } } }); }
public static unsafe float GetScribnerBoardFeetPerAcre(Trees trees) { // for now, assume all trees are of the same species if (trees.Species != FiaCode.PseudotsugaMenziesii) { throw new NotSupportedException(); } if (trees.Units != Units.English) { throw new NotSupportedException(); } // Douglas-fir #if DEBUG Vector128 <float> v6p8 = AvxExtensions.BroadcastScalarToVector128(6.8F); Vector128 <float> v10k = AvxExtensions.BroadcastScalarToVector128(10.0F * 1000.0F); #endif // constants Vector128 <float> forestersEnglish = AvxExtensions.BroadcastScalarToVector128(Constant.ForestersEnglish); Vector128 <float> one = AvxExtensions.BroadcastScalarToVector128(1.0F); Vector128 <float> six = AvxExtensions.BroadcastScalarToVector128(6.0F); Vector128 <float> vm3p21809 = AvxExtensions.BroadcastScalarToVector128(-3.21809F); // b4 Vector128 <float> v0p04948 = AvxExtensions.BroadcastScalarToVector128(0.04948F); Vector128 <float> vm0p15664 = AvxExtensions.BroadcastScalarToVector128(-0.15664F); Vector128 <float> v2p02132 = AvxExtensions.BroadcastScalarToVector128(2.02132F); Vector128 <float> v1p63408 = AvxExtensions.BroadcastScalarToVector128(1.63408F); Vector128 <float> vm0p16184 = AvxExtensions.BroadcastScalarToVector128(-0.16184F); Vector128 <float> v1p033 = AvxExtensions.BroadcastScalarToVector128(1.033F); Vector128 <float> v1p382937 = AvxExtensions.BroadcastScalarToVector128(1.382937F); Vector128 <float> vm0p4015292 = AvxExtensions.BroadcastScalarToVector128(-0.4015292F); Vector128 <float> v0p087266 = AvxExtensions.BroadcastScalarToVector128(0.087266F); Vector128 <float> vm0p174533 = AvxExtensions.BroadcastScalarToVector128(-0.174533F); Vector128 <float> vm0p6896598794 = AvxExtensions.BroadcastScalarToVector128(-0.6896598794F); // rc6-rs632 Vector128 <float> v0p993 = AvxExtensions.BroadcastScalarToVector128(0.993F); Vector128 <float> v0p174439 = AvxExtensions.BroadcastScalarToVector128(0.174439F); Vector128 <float> v0p117594 = AvxExtensions.BroadcastScalarToVector128(0.117594F); Vector128 <float> vm8p210585 = AvxExtensions.BroadcastScalarToVector128(-8.210585F); Vector128 <float> v0p236693 = AvxExtensions.BroadcastScalarToVector128(0.236693F); Vector128 <float> v0p00001345 = AvxExtensions.BroadcastScalarToVector128(0.00001345F); Vector128 <float> v0p00001937 = AvxExtensions.BroadcastScalarToVector128(0.00001937F); Vector128 <float> v1p001491 = AvxExtensions.BroadcastScalarToVector128(1.001491F); Vector128 <float> vm6p924097 = AvxExtensions.BroadcastScalarToVector128(-6.924097F); Vector128 <float> v0p912733 = AvxExtensions.BroadcastScalarToVector128(0.912733F); Vector128 <float> v0p00001351 = AvxExtensions.BroadcastScalarToVector128(0.00001351F); fixed(float *dbh = &trees.Dbh[0], expansionFactors = &trees.LiveExpansionFactor[0], height = &trees.Height[0]) { Vector128 <float> standBoardFeetPerAcre = Vector128 <float> .Zero; for (int treeIndex = 0; treeIndex < trees.Count; treeIndex += Constant.Simd128x4.Width) { Vector128 <float> dbhInInches = Avx.LoadVector128(dbh + treeIndex); Vector128 <float> heightInFeet = Avx.LoadVector128(height + treeIndex); Vector128 <float> logDbhInInches = MathV.Log10(dbhInInches); Vector128 <float> logHeightInFeet = MathV.Log10(heightInFeet); // FiaCode.PseudotsugaMenziesii => -3.21809F + 0.04948F * logHeightInFeet * logDbhInInches - 0.15664F * logDbhInInches * logDbhInInches + // 2.02132F * logDbhInInches + 1.63408F * logHeightInFeet - 0.16184F * logHeightInFeet * logHeightInFeet, Vector128 <float> cvtsl = Avx.Add(vm3p21809, Avx.Multiply(v0p04948, Avx.Multiply(logHeightInFeet, logDbhInInches))); cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p15664, Avx.Multiply(logDbhInInches, logDbhInInches))); cvtsl = Avx.Add(cvtsl, Avx.Multiply(v2p02132, logDbhInInches)); cvtsl = Avx.Add(cvtsl, Avx.Multiply(v1p63408, logHeightInFeet)); cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p16184, Avx.Multiply(logHeightInFeet, logHeightInFeet))); Vector128 <float> cubicFeet = MathV.Exp10(cvtsl); Vector128 <float> dbhSquared = Avx.Multiply(dbhInInches, dbhInInches); // could be consolidated by merging other scaling constants with Forester's constant for basal area Vector128 <float> basalAreaInSquareFeet = Avx.Multiply(forestersEnglish, dbhSquared); // b4 = cubicFeet / (1.033F * (1.0F + 1.382937F * MathV.Exp(-4.015292F * dbhInInches / 10.0F)) * (basalAreaInSquareFeet + 0.087266F) - 0.174533F); Vector128 <float> b4 = Avx.Divide(cubicFeet, Avx.Add(Avx.Multiply(v1p033, Avx.Multiply(Avx.Add(one, Avx.Multiply(v1p382937, MathV.Exp(Avx.Multiply(vm0p4015292, dbhInInches)))), Avx.Add(basalAreaInSquareFeet, v0p087266))), vm0p174533)); Vector128 <float> cv4 = Avx.Multiply(b4, Avx.Subtract(basalAreaInSquareFeet, v0p087266)); // conversion to Scribner volumes for 32 foot trees // Waddell 2014:32 // rc6 = 0.993F * (1.0F - MathF.Pow(0.62F, dbhInInches - 6.0F)); Vector128 <float> rc6 = Avx.Multiply(v0p993, Avx.Subtract(one, MathV.Exp(Avx.Multiply(vm0p6896598794, Avx.Subtract(dbhInInches, six))))); // log2(0.62) = -0.6896598794 Vector128 <float> cv6 = Avx.Multiply(rc6, cv4); Vector128 <float> logB4 = MathV.Log10(b4); // float rs616 = MathF.Pow(10.0F, 0.174439F + 0.117594F * logDbhInInches * logB4 - 8.210585F / (dbhInInches * dbhInInches) + 0.236693F * logB4 - 0.00001345F * b4 * b4 - 0.00001937F * dbhInInches * dbhInInches); Vector128 <float> rs616l = Avx.Add(v0p174439, Avx.Multiply(v0p117594, Avx.Multiply(logDbhInInches, logB4))); rs616l = Avx.Add(rs616l, Avx.Divide(vm8p210585, dbhSquared)); rs616l = Avx.Add(rs616l, Avx.Multiply(v0p236693, logB4)); rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001345, Avx.Multiply(b4, b4))); rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001937, dbhSquared)); Vector128 <float> rs616 = MathV.Exp10(rs616l); Vector128 <float> sv616 = Avx.Multiply(rs616, cv6); // Scribner board foot volume to a 6 inch top for 16 foot logs // float rs632 = 1.001491F - 6.924097F / tarif + 0.00001351F * dbhInInches * dbhInInches; Vector128 <float> rs632 = Avx.Add(v1p001491, Avx.Divide(vm6p924097, Avx.Multiply(v0p912733, b4))); rs632 = Avx.Add(rs632, Avx.Multiply(v0p00001351, dbhSquared)); Vector128 <float> zeroVolumeMask = Avx.CompareLessThanOrEqual(dbhInInches, six); Vector128 <float> sv632 = Avx.Multiply(rs632, sv616); // Scribner board foot volume to a 6 inch top for 32 foot logs sv632 = Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask); #if DEBUG DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rc6, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero)); DebugV.Assert(Avx.CompareLessThanOrEqual(rc6, one)); DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs616, one, zeroVolumeMask), one)); DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs616, Vector128 <float> .Zero, zeroVolumeMask), v6p8)); DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero)); DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), one)); DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero)); DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), v10k)); #endif Vector128 <float> expansionFactor = Avx.LoadVector128(expansionFactors + treeIndex); standBoardFeetPerAcre = Avx.Add(standBoardFeetPerAcre, Avx.Multiply(expansionFactor, sv632)); } standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre); standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre); return(standBoardFeetPerAcre.ToScalar()); } }