public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Avx.Divide( Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); var result = Avx.Divide( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Avx.Divide( Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr)), Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Avx.Divide( Avx.LoadVector256((Double *)(&test._fld1)), Avx.LoadVector256((Double *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__DivideDouble testClass) { fixed(Vector256 <Double> *pFld1 = &_fld1) fixed(Vector256 <Double> *pFld2 = &_fld2) { var result = Avx.Divide( Avx.LoadVector256((Double *)(pFld1)), Avx.LoadVector256((Double *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector256 <Single> *pClsVar1 = &_clsVar1) fixed(Vector256 <Single> *pClsVar2 = &_clsVar2) { var result = Avx.Divide( Avx.LoadVector256((Single *)(pClsVar1)), Avx.LoadVector256((Single *)(pClsVar2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector256 <Double> *pFld1 = &_fld1) fixed(Vector256 <Double> *pFld2 = &_fld2) { var result = Avx.Divide( Avx.LoadVector256((Double *)(pFld1)), Avx.LoadVector256((Double *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
public void RunClassLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load)); var test = new SimpleBinaryOpTest__DivideDouble(); fixed(Vector256 <Double> *pFld1 = &test._fld1) fixed(Vector256 <Double> *pFld2 = &test._fld2) { var result = Avx.Divide( Avx.LoadVector256((Double *)(pFld1)), Avx.LoadVector256((Double *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); } }
public (double near, double far) IntersectAVX(Ray ray) { Vector256 <double> origin = (Vector256 <double>)ray.Origin; Vector256 <double> direction = (Vector256 <double>)ray.Direction; Vector256 <double> zeroes = new Vector256 <double>(); Vector256 <double> min = (Vector256 <double>)Minimum; Vector256 <double> max = (Vector256 <double>)Maximum; // Replace slabs that won't be checked (0 direction axis) with infinity so that NaN doesn't propagate Vector256 <double> dirInfMask = Avx.And( Avx.Compare(direction, zeroes, FloatComparisonMode.OrderedEqualNonSignaling), Avx.And( Avx.Compare(origin, min, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling), Avx.Compare(origin, max, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling))); min = Avx.BlendVariable(min, SIMDHelpers.BroadcastScalar4(double.NegativeInfinity), dirInfMask); max = Avx.BlendVariable(max, SIMDHelpers.BroadcastScalar4(double.PositiveInfinity), dirInfMask); // Flip slabs in direction axes that are negative (using direction as mask takes the most significant bit, the sign.. probably includes -0) Vector256 <double> minMasked = Avx.BlendVariable(min, max, direction); Vector256 <double> maxMasked = Avx.BlendVariable(max, min, direction); direction = Avx.Divide(Vector256.Create(1D), direction); Vector256 <double> near4 = Avx.Multiply(Avx.Subtract(minMasked, origin), direction); Vector256 <double> far4 = Avx.Multiply(Avx.Subtract(maxMasked, origin), direction); Vector128 <double> near2 = Sse2.Max(near4.GetLower(), near4.GetUpper()); near2 = Sse2.MaxScalar(near2, SIMDHelpers.Swap(near2)); Vector128 <double> far2 = Sse2.Min(far4.GetLower(), far4.GetUpper()); far2 = Sse2.MinScalar(far2, SIMDHelpers.Swap(far2)); if (Sse2.CompareScalarOrderedGreaterThan(near2, far2) | Sse2.CompareScalarOrderedLessThan(far2, new Vector128 <double>())) { return(double.NaN, double.NaN); } return(near2.ToScalar(), far2.ToScalar()); }
public static Vector256 <float> Digamma(Vector256 <float> x) { const float s3 = 1.0f / 12.0f; const float s4 = 1.0f / 120.0f; const float s5 = 1.0f / 252.0f; const float s6 = 1.0f / 240.0f; const float s7 = 1.0f / 132.0f; const float half = 0.5f; const float one = 1f; var vone = Vector256.Create(one); // note: 'Reciprocal' does not yield the same numerical results than 'Divide(1f, x)' var result = Vector256 <float> .Zero; for (var i = 0; i < 3; i++) { result = Avx.Subtract(result, Avx.Divide(vone, x)); x = Avx2.Add(x, vone); } var r = Avx.Divide(vone, x); result = Avx.Add(result, Avx.Subtract(Log(x), Avx.Multiply(Vector256.Create(half), r))); r = Avx.Multiply(r, r); result = Avx.Subtract(result, Avx.Multiply(r, Avx.Subtract(Vector256.Create(s3), Avx.Multiply(r, Avx.Subtract(Vector256.Create(s4), Avx.Multiply(r, Avx.Subtract(Vector256.Create(s5), Avx.Multiply(r, Avx.Subtract(Vector256.Create(s6), Avx.Multiply(r, Vector256.Create(s7))))))))))); return(result); }
public static Vector128 <float> GetBrucePsmeAbgrGrowthEffectiveAge(SiteConstants site, float timeStepInYears, Vector128 <float> treeHeight, out Vector128 <float> potentialHeightGrowth) { Vector128 <float> B1 = AvxExtensions.BroadcastScalarToVector128(site.B1); Vector128 <float> B2 = AvxExtensions.BroadcastScalarToVector128(site.B2); Vector128 <float> X2toB2 = AvxExtensions.BroadcastScalarToVector128(site.X2toB2); Vector128 <float> siteIndexFromGround128 = AvxExtensions.BroadcastScalarToVector128(site.SiteIndexFromGround); Vector128 <float> X1 = AvxExtensions.BroadcastScalarToVector128(site.X1); Vector128 <float> XX1 = Avx.Add(Avx.Divide(MathV.Ln(Avx.Divide(treeHeight, siteIndexFromGround128)), B1), X2toB2); Vector128 <float> xx1lessThanZero = Avx.CompareLessThanOrEqual(XX1, Vector128 <float> .Zero); Vector128 <float> growthEffectiveAge = Avx.Subtract(MathV.Pow(XX1, Avx.Reciprocal(B2)), X1); growthEffectiveAge = Avx.BlendVariable(growthEffectiveAge, AvxExtensions.BroadcastScalarToVector128(500.0F), xx1lessThanZero); Vector128 <float> timeStepInYearsPlusX1 = AvxExtensions.BroadcastScalarToVector128(timeStepInYears + site.X1); Vector128 <float> potentialHeightPower = Avx.Multiply(B1, Avx.Subtract(MathV.Pow(Avx.Add(growthEffectiveAge, timeStepInYearsPlusX1), B2), X2toB2)); Vector128 <float> potentialHeight = Avx.Multiply(siteIndexFromGround128, MathV.Exp(potentialHeightPower)); potentialHeightGrowth = Avx.Subtract(potentialHeight, treeHeight); return(growthEffectiveAge); }
public override ulong Run(CancellationToken cancellationToken) { if (!Avx.IsSupported) { return(0uL); } var randomFloatingSpan = new Span <float>(new[] { RANDOM_FLOAT, RANDOM_FLOAT, RANDOM_FLOAT, RANDOM_FLOAT, RANDOM_FLOAT, RANDOM_FLOAT, RANDOM_FLOAT, RANDOM_FLOAT }); var dst = new Span <float>(Enumerable.Repeat(float.MaxValue, 8).ToArray()); var iterations = 0uL; unsafe { fixed(float *pdst = dst) fixed(float *psrc = randomFloatingSpan) { var srcVector = Avx.LoadVector256(psrc); var dstVector = Avx.LoadVector256(pdst); while (!cancellationToken.IsCancellationRequested) { for (var j = 0; j < LENGTH; j++) { dstVector = Avx.Divide(dstVector, srcVector); } Avx.Store(pdst, dstVector); iterations++; } } } return(iterations); }
public static Vector256 <float> LogGamma(Vector256 <float> x) { const float one = 1f; const float half = 0.5f; // Numeric shift to improve accuracy (cost +1 'Log'). // logGamma(x) = logGamma(x + 1) - log(x) var result = Log(x); x = Avx.Add(x, Vector256.Create(one)); const float Ln2Pi_2 = 0.91893853320467274178032f; // A & S eq. 6.1.48 (continuing fraction) const float a0 = (float)(1.0 / 12); const float a1 = (float)(1.0 / 30); const float a2 = (float)(53.0 / 210); const float a3 = (float)(195.0 / 371); const float a4 = (float)(22999.0 / 22737); const float a5 = (float)(29944523.0 / 19733142); const float a6 = (float)(109535241009.0 / 48264275462); var t6 = Avx.Divide(Vector256.Create(a6), x); var t5 = Avx.Divide(Vector256.Create(a5), Avx.Add(x, t6)); var t4 = Avx.Divide(Vector256.Create(a4), Avx.Add(x, t5)); var t3 = Avx.Divide(Vector256.Create(a3), Avx.Add(x, t4)); var t2 = Avx.Divide(Vector256.Create(a2), Avx.Add(x, t3)); var t1 = Avx.Divide(Vector256.Create(a1), Avx.Add(x, t2)); var t0 = Avx.Divide(Vector256.Create(a0), Avx.Add(x, t1)); result = Avx.Subtract( Avx.Add( Avx.Add(Avx.Subtract(t0, x), Avx.Multiply(Avx.Subtract(x, Vector256.Create(half)), Log(x))), Vector256.Create(Ln2Pi_2)), result); return(result); }
public Intro() { var middleVector = Vector128.Create(1.0f); // middleVector = <1,1,1,1> middleVector = Vector128.CreateScalar(-1.0f); // middleVector = <-1,0,0,0> var floatBytes = Vector64.AsByte(Vector64.Create(1.0f, -1.0f)); // floatBytes = <0, 0, 128, 63, 0, 0, 128, 191> if (Avx.IsSupported) { var left = Vector256.Create(-2.5f); // <-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5> var right = Vector256.Create(5.0f); // <5, 5, 5, 5, 5, 5, 5, 5> Vector256 <float> result = Avx.AddSubtract(left, right); // result = <-7.5, 2.5, -7.5, 2.5, -7.5, 2.5, -7.5, 2.5>xit left = Vector256.Create(-1.0f, -2.0f, -3.0f, -4.0f, -50.0f, -60.0f, -70.0f, -80.0f); right = Vector256.Create(0.0f, 2.0f, 3.0f, 4.0f, 50.0f, 60.0f, 70.0f, 80.0f); result = Avx.UnpackHigh(left, right); // result = <-3, 3, -4, 4, -70, 70, -80, 80> result = Avx.UnpackLow(left, right); // result = <-1, 1, -2, 2, -50, 50, -60, 60> result = Avx.DotProduct(left, right, 0b1111_0001); // result = <-30, 0, 0, 0, -17400, 0, 0, 0> bool testResult = Avx.TestC(left, right); // testResult = true testResult = Avx.TestC(right, left); // testResult = false Vector256 <float> result1 = Avx.Divide(left, right); var plusOne = Vector256.Create(1.0f); result = Avx.Compare(right, result1, FloatComparisonMode.OrderedGreaterThanNonSignaling); result = Avx.Compare(right, result1, FloatComparisonMode.UnorderedNotLessThanNonSignaling); left = Vector256.Create(0.0f, 3.0f, -3.0f, 4.0f, -50.0f, 60.0f, -70.0f, 80.0f); right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); Vector256 <float> nanInFirstPosition = Avx.Divide(left, right); left = Vector256.Create(1.1f, 3.3333333f, -3.0f, 4.22f, -50.0f, 60.0f, -70.0f, 80.0f); Vector256 <float> InfInFirstPosition = Avx.Divide(left, right); left = Vector256.Create(-1.1f, 3.0f, 1.0f / 3.0f, MathF.PI, -50.0f, 60.0f, -70.0f, 80.0f); right = Vector256.Create(0.0f, 2.0f, 3.1f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); Vector256 <float> compareResult = Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN> Vector256 <float> mixed = Avx.BlendVariable(left, right, compareResult); // mixed = <-1, 2, -3, 2, -50, -60, -70, -80> //left = Vector256.Create(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f); //right = Vector256.Create(1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f); Vector256 <float> other = right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); bool bRes = Avx.TestZ(plusOne, compareResult); bool bRes2 = Avx.TestC(plusOne, compareResult); bool allTrue = !Avx.TestZ(compareResult, compareResult); compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.OrderedEqualNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN> compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.UnorderedEqualNonSignaling); compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling); compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); var left128 = Vector128.Create(1.0f, 2.0f, 3.0f, 4.0f); var right128 = Vector128.Create(2.0f, 3.0f, 4.0f, 5.0f); Vector128 <float> compResult128 = Sse.CompareGreaterThan(left128, right128); // compResult128 = <0, 0, 0, 0> int res = Avx.MoveMask(compareResult); if (Fma.IsSupported) { Vector256 <float> resultFma = Fma.MultiplyAdd(left, right, other); // = left * right + other for each element resultFma = Fma.MultiplyAddNegated(left, right, other); // = -(left * right + other) for each element resultFma = Fma.MultiplySubtract(left, right, other); // = left * right - other for each element Fma.MultiplyAddSubtract(left, right, other); // even elements (0, 2, ...) like MultiplyAdd, odd elements like MultiplySubtract } result = Avx.DotProduct(left, right, 0b1010_0001); // result = <-20, 0, 0, 0, -10000, 0, 0, 0> result = Avx.Floor(left); // result = <-3, -3, -3, -3, -3, -3, -3, -3> result = Avx.Add(left, right); // result = <2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5> result = Avx.Ceiling(left); // result = <-2, -2, -2, -2, -2, -2, -2, -2> result = Avx.Multiply(left, right); // result = <-12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5> result = Avx.HorizontalAdd(left, right); // result = <-5, -5, 10, 10, -5, -5, 10, 10> result = Avx.HorizontalSubtract(left, right); // result = <0, 0, 0, 0, 0, 0, 0, 0> double[] someDoubles = new double[] { 1.0, 3.0, -2.5, 7.5, 10.8, 0.33333 }; double[] someOtherDoubles = new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; double[] someResult = new double[someDoubles.Length]; float[] someFloats = new float[] { 1, 2, 3, 4, 10, 20, 30, 40, 0 }; float[] someOtherFloats = new float[] { 1, 1, 1, 1, 1, 1, 1, 1 }; unsafe { fixed(double *ptr = &someDoubles[1]) { fixed(double *ptr2 = &someResult[0]) { Vector256 <double> res2 = Avx.LoadVector256(ptr); // res2 = <3, -2.5, 7.5, 10.8> Avx.Store(ptr2, res2); } } fixed(float *ptr = &someFloats[0]) { fixed(float *ptr2 = &someOtherFloats[0]) { Vector256 <float> res2 = Avx.DotProduct(Avx.LoadVector256(ptr), Avx.LoadVector256(ptr2), 0b0001_0001); //Avx.Store(ptr2, res2); } } } } }
public static f32 Div(f32 lhs, f32 rhs) => Avx.Divide(lhs, rhs);
// Element-wise division. public static IEnumerable <Vector256 <double> > Div( this IEnumerable <Vector256 <double> > @this, IEnumerable <Vector256 <double> > other) => @this.Zip(other).Select(ab => Avx.Divide(ab.First, ab.Second));
public static Vector256 <float> op_Division(Vector256 <float> left, Vector256 <float> right) => Avx.Divide(left, right);
public static Vector256 <float> op_Division(Vector256 <float> vector, float scalar) => Avx.Divide(vector, Vector256.Create(scalar));
public static Vector256 <double> op_Division(Vector256 <double> left, Vector256 <double> right) => Avx.Divide(left, right);
public unsafe override double[] Applay(double[] values, int halfWindow) { var windowSize = 2 * halfWindow + 1; var resultSize = values.Length - windowSize + 1; if (resultSize == 0) { return(null); } var a = new double[resultSize]; var sum = 0d; fixed(double *valueStart = values, aStart = a) { var valueCurrent = valueStart; var valueEndwindowSize = valueCurrent + windowSize; while (valueCurrent < valueEndwindowSize) { sum += *valueCurrent; valueCurrent++; } var aCurrent = aStart + 1; var aEnd = aStart + resultSize; var aUnrolledEnd = aStart + (((resultSize - 1) >> 4) << 4); valueCurrent = valueStart; var valueWindowSize = valueStart + windowSize; var vWindowSize = Vector256.Create( (double)windowSize, (double)windowSize, (double)windowSize, (double)windowSize); while (aCurrent < aUnrolledEnd) { #region 1 Avx.Store( aCurrent, Avx.Divide( Avx.Subtract( Avx.LoadVector256(valueWindowSize), Avx.LoadVector256(valueCurrent)), vWindowSize ) ); #endregion #region 2 Avx.Store( aCurrent + 4, Avx.Divide( Avx.Subtract( Avx.LoadVector256(valueWindowSize + 4), Avx.LoadVector256(valueCurrent + 4)), vWindowSize ) ); #endregion #region 3 Avx.Store( aCurrent + 8, Avx.Divide( Avx.Subtract( Avx.LoadVector256(valueWindowSize + 8), Avx.LoadVector256(valueCurrent + 8)), vWindowSize ) ); #endregion #region 4 Avx.Store( aCurrent + 12, Avx.Divide( Avx.Subtract( Avx.LoadVector256(valueWindowSize + 12), Avx.LoadVector256(valueCurrent + 12)), vWindowSize ) ); #endregion valueWindowSize += 16; valueCurrent += 16; aCurrent += 16; } while (aCurrent < aEnd) { *aCurrent = (*valueWindowSize - *valueCurrent) / windowSize; aCurrent++; valueCurrent++; valueWindowSize++; } var aPrev = aStart; aCurrent = aStart + 1; aEnd = aStart + resultSize; *aPrev = sum / windowSize; aUnrolledEnd = aStart + (((resultSize - 1) >> 2) << 2); while (aCurrent < aUnrolledEnd) { #region 1 *aCurrent += *aPrev; aCurrent++; aPrev++; #endregion #region 2 *aCurrent += *aPrev; aCurrent++; aPrev++; #endregion #region 3 *aCurrent += *aPrev; aCurrent++; aPrev++; #endregion #region 4 *aCurrent += *aPrev; aCurrent++; aPrev++; #endregion } while (aCurrent < aEnd) { *aCurrent += *aPrev; aCurrent++; aPrev++; } } return(a); }
public unsafe override double[] Applay(double[] values, int halfWindow) { var windowSize = 2 * halfWindow + 1; var resultSize = values.Length - windowSize + 1; if (resultSize == 0) { return(null); } var a = new double[resultSize]; var sum = 0d; fixed(double *valueStart = values, aStart = a) { var valueCurrent = valueStart; var valueEndwindowSize = valueCurrent + windowSize; while (valueCurrent < valueEndwindowSize) { sum += *valueCurrent; valueCurrent++; } var aCurrent = aStart + 1; var aEnd = aStart + resultSize; var aUnrolledEnd = aStart + (((resultSize - 1) >> 4) << 4); valueCurrent = valueStart; var valueWindowSize = valueStart + windowSize; var vWindowSize = Vector256.Create((double)windowSize); var vCurrent = Vector256.Create( (ulong)aCurrent, (ulong)aCurrent + 4 * sizeof(double), (ulong)aCurrent + 8 * sizeof(double), (ulong)aCurrent + 12 * sizeof(double)); var vValueCurrent = Vector256.Create( (ulong)valueCurrent, (ulong)valueCurrent + 4 * sizeof(double), (ulong)valueCurrent + 8 * sizeof(double), (ulong)valueCurrent + 12 * sizeof(double)); var vValueWindowSize = Vector256.Create( (ulong)valueWindowSize, (ulong)valueWindowSize + 4 * sizeof(double), (ulong)valueWindowSize + 8 * sizeof(double), (ulong)valueWindowSize + 12 * sizeof(double)); var vShiftIndex1 = Vector256.Create(16ul * sizeof(double)); while (aCurrent < aUnrolledEnd) { #region 1 Avx.Store( aCurrent, Avx.Divide( Avx.Subtract( Avx.LoadVector256((double *)vValueWindowSize.GetElement(0)), Avx.LoadVector256((double *)vValueCurrent.GetElement(0))), vWindowSize ) ); #endregion #region 2 Avx.Store( (double *)vCurrent.GetElement(1), Avx.Divide( Avx.Subtract( Avx.LoadVector256((double *)vValueWindowSize.GetElement(1)), Avx.LoadVector256((double *)vValueCurrent.GetElement(1))), vWindowSize ) ); #endregion #region 3 Avx.Store( (double *)vCurrent.GetElement(2), Avx.Divide( Avx.Subtract( Avx.LoadVector256((double *)vValueWindowSize.GetElement(2)), Avx.LoadVector256((double *)vValueCurrent.GetElement(2))), vWindowSize ) ); #endregion #region 4 Avx.Store( (double *)vCurrent.GetElement(3), Avx.Divide( Avx.Subtract( Avx.LoadVector256((double *)vValueWindowSize.GetElement(3)), Avx.LoadVector256((double *)vValueCurrent.GetElement(3))), vWindowSize ) ); #endregion vCurrent = Avx.Add(vCurrent.AsDouble(), vShiftIndex1.AsDouble()).AsUInt64(); vValueCurrent = Avx.Add(vValueCurrent.AsDouble(), vShiftIndex1.AsDouble()).AsUInt64(); vValueWindowSize = Avx.Add(vValueWindowSize.AsDouble(), vShiftIndex1.AsDouble()).AsUInt64(); aCurrent = (double *)vCurrent.GetElement(0); } valueWindowSize = (double *)vValueWindowSize.GetElement(0); valueCurrent = (double *)vValueCurrent.GetElement(0); while (aCurrent < aEnd) { *aCurrent = (*valueWindowSize - *valueCurrent) / windowSize; aCurrent++; valueCurrent++; valueWindowSize++; } var aPrev = aStart; aCurrent = aStart + 1; aEnd = aStart + resultSize; *aPrev = sum / windowSize; aUnrolledEnd = aStart + (((resultSize - 1) >> 2) << 2); vCurrent = Vector256.Create( (ulong)aCurrent, (ulong)aCurrent + sizeof(double), (ulong)aCurrent + 2 * sizeof(double), (ulong)aCurrent + 3 * sizeof(double)); var vPrev = Vector256.Create( (ulong)aPrev, (ulong)aPrev + sizeof(double), (ulong)aPrev + 2 * sizeof(double), (ulong)aPrev + 3 * sizeof(double)); var vShiftIndex = Vector256.Create(4ul * sizeof(double)); while (aCurrent < aUnrolledEnd) { #region 1 *aCurrent += *(double *)vPrev.GetElement(0); #endregion #region 2 *(double *)vCurrent.GetElement(1) += *(double *)vPrev.GetElement(1); #endregion #region 3 *(double *)vCurrent.GetElement(2) += *(double *)vPrev.GetElement(2); #endregion #region 4 *(double *)vCurrent.GetElement(3) += *(double *)vPrev.GetElement(3); #endregion vCurrent = Avx.Add(vCurrent.AsDouble(), vShiftIndex.AsDouble()).AsUInt64(); vPrev = Avx.Add(vPrev.AsDouble(), vShiftIndex.AsDouble()).AsUInt64(); aCurrent = (double *)vCurrent.GetElement(0); } aPrev = (double *)vPrev.GetElement(0); while (aCurrent < aEnd) { *aCurrent += *aPrev; aCurrent++; aPrev++; } } return(a); }
public static Vector256 <double> op_Division(Vector256 <double> vector, double scalar) => Avx.Divide(vector, Vector256.Create(scalar));
protected override unsafe double CalculateImpl(double x, double stepThreshold, int maxN) { if (!Avx.IsSupported) { Status = TaylorSeriesStatus.NotSupported; return(Double.NaN); } const int vectorSize = 256 / 8 / sizeof(double); // v8888 <- (8, 8, 8, 8) var value8 = 8.0; var v8888 = Avx.BroadcastScalarToVector256(&value8); // xPow8 <- (x^8, x^8, x^8, x^8) var xPow8 = Avx.BroadcastScalarToVector256(&x); xPow8 = Avx.Multiply(xPow8, xPow8); xPow8 = Avx.Multiply(xPow8, xPow8); xPow8 = Avx.Multiply(xPow8, xPow8); // up <- (x^(-1), x^(-3), x^(-5), x^(-7)) var upSa = stackalloc double[vectorSize]; var xDiv2iPlus1 = 1 / x; for (var i = 0; i < vectorSize; i++) { upSa[i] = xDiv2iPlus1; xDiv2iPlus1 /= x * x; } var up = Avx.LoadVector256(upSa); // down <- (1, 3, 5, 7) var downSa = stackalloc double[vectorSize] { 1, 3, 5, 7 }; var down = Avx.LoadVector256(downSa); // sum <- (0, 0, 0, 0) var sum = Vector256 <double> .Zero; N = 0; while (N < maxN) { // div <- up / down var div = Avx.Divide(up, down); // sum <- sum + div sum = Avx.Add(sum, div); // div = (x1, x2, x3, last) var last = div.GetElement(vectorSize - 1); N += vectorSize; if (Math.Abs(last) < stepThreshold) { break; } // up <- up / (x^8, x^8, x^8, x^8) up = Avx.Divide(up, xPow8); // down <- down + (8, 8, 8, 8) down = Avx.Add(down, v8888); } var resultSa = stackalloc double[vectorSize]; Avx.Store(resultSa, sum); Status = N >= maxN ? TaylorSeriesStatus.TooManyIterations : TaylorSeriesStatus.Success; return(resultSa[0] + resultSa[1] + resultSa[2] + resultSa[3]); } }
public static unsafe float GetScribnerBoardFeetPerAcre(Trees trees) { // for now, assume all trees are of the same species if (trees.Species != FiaCode.PseudotsugaMenziesii) { throw new NotSupportedException(); } if (trees.Units != Units.English) { throw new NotSupportedException(); } // Douglas-fir #if DEBUG Vector128 <float> v6p8 = AvxExtensions.BroadcastScalarToVector128(6.8F); Vector128 <float> v10k = AvxExtensions.BroadcastScalarToVector128(10.0F * 1000.0F); #endif // constants Vector128 <float> forestersEnglish = AvxExtensions.BroadcastScalarToVector128(Constant.ForestersEnglish); Vector128 <float> one = AvxExtensions.BroadcastScalarToVector128(1.0F); Vector128 <float> six = AvxExtensions.BroadcastScalarToVector128(6.0F); Vector128 <float> vm3p21809 = AvxExtensions.BroadcastScalarToVector128(-3.21809F); // b4 Vector128 <float> v0p04948 = AvxExtensions.BroadcastScalarToVector128(0.04948F); Vector128 <float> vm0p15664 = AvxExtensions.BroadcastScalarToVector128(-0.15664F); Vector128 <float> v2p02132 = AvxExtensions.BroadcastScalarToVector128(2.02132F); Vector128 <float> v1p63408 = AvxExtensions.BroadcastScalarToVector128(1.63408F); Vector128 <float> vm0p16184 = AvxExtensions.BroadcastScalarToVector128(-0.16184F); Vector128 <float> v1p033 = AvxExtensions.BroadcastScalarToVector128(1.033F); Vector128 <float> v1p382937 = AvxExtensions.BroadcastScalarToVector128(1.382937F); Vector128 <float> vm0p4015292 = AvxExtensions.BroadcastScalarToVector128(-0.4015292F); Vector128 <float> v0p087266 = AvxExtensions.BroadcastScalarToVector128(0.087266F); Vector128 <float> vm0p174533 = AvxExtensions.BroadcastScalarToVector128(-0.174533F); Vector128 <float> vm0p6896598794 = AvxExtensions.BroadcastScalarToVector128(-0.6896598794F); // rc6-rs632 Vector128 <float> v0p993 = AvxExtensions.BroadcastScalarToVector128(0.993F); Vector128 <float> v0p174439 = AvxExtensions.BroadcastScalarToVector128(0.174439F); Vector128 <float> v0p117594 = AvxExtensions.BroadcastScalarToVector128(0.117594F); Vector128 <float> vm8p210585 = AvxExtensions.BroadcastScalarToVector128(-8.210585F); Vector128 <float> v0p236693 = AvxExtensions.BroadcastScalarToVector128(0.236693F); Vector128 <float> v0p00001345 = AvxExtensions.BroadcastScalarToVector128(0.00001345F); Vector128 <float> v0p00001937 = AvxExtensions.BroadcastScalarToVector128(0.00001937F); Vector128 <float> v1p001491 = AvxExtensions.BroadcastScalarToVector128(1.001491F); Vector128 <float> vm6p924097 = AvxExtensions.BroadcastScalarToVector128(-6.924097F); Vector128 <float> v0p912733 = AvxExtensions.BroadcastScalarToVector128(0.912733F); Vector128 <float> v0p00001351 = AvxExtensions.BroadcastScalarToVector128(0.00001351F); fixed(float *dbh = &trees.Dbh[0], expansionFactors = &trees.LiveExpansionFactor[0], height = &trees.Height[0]) { Vector128 <float> standBoardFeetPerAcre = Vector128 <float> .Zero; for (int treeIndex = 0; treeIndex < trees.Count; treeIndex += Constant.Simd128x4.Width) { Vector128 <float> dbhInInches = Avx.LoadVector128(dbh + treeIndex); Vector128 <float> heightInFeet = Avx.LoadVector128(height + treeIndex); Vector128 <float> logDbhInInches = MathV.Log10(dbhInInches); Vector128 <float> logHeightInFeet = MathV.Log10(heightInFeet); // FiaCode.PseudotsugaMenziesii => -3.21809F + 0.04948F * logHeightInFeet * logDbhInInches - 0.15664F * logDbhInInches * logDbhInInches + // 2.02132F * logDbhInInches + 1.63408F * logHeightInFeet - 0.16184F * logHeightInFeet * logHeightInFeet, Vector128 <float> cvtsl = Avx.Add(vm3p21809, Avx.Multiply(v0p04948, Avx.Multiply(logHeightInFeet, logDbhInInches))); cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p15664, Avx.Multiply(logDbhInInches, logDbhInInches))); cvtsl = Avx.Add(cvtsl, Avx.Multiply(v2p02132, logDbhInInches)); cvtsl = Avx.Add(cvtsl, Avx.Multiply(v1p63408, logHeightInFeet)); cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p16184, Avx.Multiply(logHeightInFeet, logHeightInFeet))); Vector128 <float> cubicFeet = MathV.Exp10(cvtsl); Vector128 <float> dbhSquared = Avx.Multiply(dbhInInches, dbhInInches); // could be consolidated by merging other scaling constants with Forester's constant for basal area Vector128 <float> basalAreaInSquareFeet = Avx.Multiply(forestersEnglish, dbhSquared); // b4 = cubicFeet / (1.033F * (1.0F + 1.382937F * MathV.Exp(-4.015292F * dbhInInches / 10.0F)) * (basalAreaInSquareFeet + 0.087266F) - 0.174533F); Vector128 <float> b4 = Avx.Divide(cubicFeet, Avx.Add(Avx.Multiply(v1p033, Avx.Multiply(Avx.Add(one, Avx.Multiply(v1p382937, MathV.Exp(Avx.Multiply(vm0p4015292, dbhInInches)))), Avx.Add(basalAreaInSquareFeet, v0p087266))), vm0p174533)); Vector128 <float> cv4 = Avx.Multiply(b4, Avx.Subtract(basalAreaInSquareFeet, v0p087266)); // conversion to Scribner volumes for 32 foot trees // Waddell 2014:32 // rc6 = 0.993F * (1.0F - MathF.Pow(0.62F, dbhInInches - 6.0F)); Vector128 <float> rc6 = Avx.Multiply(v0p993, Avx.Subtract(one, MathV.Exp(Avx.Multiply(vm0p6896598794, Avx.Subtract(dbhInInches, six))))); // log2(0.62) = -0.6896598794 Vector128 <float> cv6 = Avx.Multiply(rc6, cv4); Vector128 <float> logB4 = MathV.Log10(b4); // float rs616 = MathF.Pow(10.0F, 0.174439F + 0.117594F * logDbhInInches * logB4 - 8.210585F / (dbhInInches * dbhInInches) + 0.236693F * logB4 - 0.00001345F * b4 * b4 - 0.00001937F * dbhInInches * dbhInInches); Vector128 <float> rs616l = Avx.Add(v0p174439, Avx.Multiply(v0p117594, Avx.Multiply(logDbhInInches, logB4))); rs616l = Avx.Add(rs616l, Avx.Divide(vm8p210585, dbhSquared)); rs616l = Avx.Add(rs616l, Avx.Multiply(v0p236693, logB4)); rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001345, Avx.Multiply(b4, b4))); rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001937, dbhSquared)); Vector128 <float> rs616 = MathV.Exp10(rs616l); Vector128 <float> sv616 = Avx.Multiply(rs616, cv6); // Scribner board foot volume to a 6 inch top for 16 foot logs // float rs632 = 1.001491F - 6.924097F / tarif + 0.00001351F * dbhInInches * dbhInInches; Vector128 <float> rs632 = Avx.Add(v1p001491, Avx.Divide(vm6p924097, Avx.Multiply(v0p912733, b4))); rs632 = Avx.Add(rs632, Avx.Multiply(v0p00001351, dbhSquared)); Vector128 <float> zeroVolumeMask = Avx.CompareLessThanOrEqual(dbhInInches, six); Vector128 <float> sv632 = Avx.Multiply(rs632, sv616); // Scribner board foot volume to a 6 inch top for 32 foot logs sv632 = Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask); #if DEBUG DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rc6, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero)); DebugV.Assert(Avx.CompareLessThanOrEqual(rc6, one)); DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs616, one, zeroVolumeMask), one)); DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs616, Vector128 <float> .Zero, zeroVolumeMask), v6p8)); DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero)); DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), one)); DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero)); DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), v10k)); #endif Vector128 <float> expansionFactor = Avx.LoadVector128(expansionFactors + treeIndex); standBoardFeetPerAcre = Avx.Add(standBoardFeetPerAcre, Avx.Multiply(expansionFactor, sv632)); } standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre); standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre); return(standBoardFeetPerAcre.ToScalar()); } }