Esempio n. 1
0
            public void RunStructFldScenario(SimpleTernaryOpTest__BlendVariableDouble testClass)
            {
                var result = Avx.BlendVariable(_fld1, _fld2, _fld3);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr);
            }
Esempio n. 2
0
        private static Vector256 <float> ComputeScores(Vector256 <float> vW, Vector256 <float> vN, Vector256 <float> vP,
                                                       Vector256 <float> vVirtualLossMultiplier,
                                                       float cpuctSqrtParentN, float uctDenominatorPower,
                                                       Vector256 <float> vQWhenNoChildren, Vector256 <float> vNInFlight)
        {
            Vector256 <float> vNPlusNInFlight = Avx.Add(vN, vNInFlight);

            Vector256 <float> denominator = uctDenominatorPower switch
            {
                1.0f => vNPlusNInFlight,
                0.5f => Avx.Sqrt(vNPlusNInFlight),
                _ => ToPower(vNPlusNInFlight, uctDenominatorPower)
            };

            Vector256 <float> vLossContrib = Avx.Multiply(vNInFlight, vVirtualLossMultiplier);

            // Compute U = ((p)(cpuct)(sqrt_parentN)) / (n + n_in_flight + 1)
            Vector256 <float> vCPUCTSqrtParentN = Vector256.Create(cpuctSqrtParentN);
            Vector256 <float> vUNumerator       = Avx.Multiply(vP, vCPUCTSqrtParentN);
            Vector256 <float> vDenominator      = Avx.Add(vOnes, denominator);
            Vector256 <float> vU = Avx.Divide(vUNumerator, vDenominator);

            Vector256 <float> vQWithChildren    = Avx.Divide(Avx.Subtract(vLossContrib, vW), vNPlusNInFlight);
            Vector256 <float> vQWithoutChildren = Avx.Add(vQWhenNoChildren, vLossContrib);

            Vector256 <float> maskNoChildren = Avx.Compare(vNPlusNInFlight, vZeros, FloatComparisonMode.OrderedGreaterThanSignaling);
            Vector256 <float> vQ             = Avx.BlendVariable(vQWithoutChildren, vQWithChildren, maskNoChildren);

            Vector256 <float> vScore = Avx.Add(vU, vQ);

            return(vScore);
        }
Esempio n. 3
0
        public void RunFldScenario()
        {
            var result = Avx.BlendVariable(_fld1, _fld2, _fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
        }
Esempio n. 4
0
        public void RunLclFldScenario()
        {
            var test   = new SimpleTernaryOpTest__BlendVariableSingle();
            var result = Avx.BlendVariable(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Esempio n. 5
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Avx.BlendVariable(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Esempio n. 6
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Avx.BlendVariable(_fld1, _fld2, _fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
        }
Esempio n. 7
0
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleTernaryOpTest__BlendVariableDouble();
            var result = Avx.BlendVariable(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Esempio n. 8
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var firstOp  = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr);
            var secondOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr);
            var thirdOp  = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr);
            var result   = Avx.BlendVariable(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Esempio n. 9
0
        public void RunLclVarScenario_LoadAligned()
        {
            var firstOp  = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr));
            var secondOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr));
            var thirdOp  = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr));
            var result   = Avx.BlendVariable(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Esempio n. 10
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Avx.BlendVariable(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Esempio n. 11
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Avx.BlendVariable(
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Esempio n. 12
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Avx.BlendVariable(
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr)),
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr)),
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Esempio n. 13
0
        public void RunClsVarScenario()
        {
            var result = Avx.BlendVariable(
                _clsVar1,
                _clsVar2,
                _clsVar3
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
        }
Esempio n. 14
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var firstOp  = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr));
            var secondOp = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr));
            var thirdOp  = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray3Ptr));
            var result   = Avx.BlendVariable(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Esempio n. 15
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr));
            var op2    = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr));
            var op3    = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray3Ptr));
            var result = Avx.BlendVariable(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
Esempio n. 16
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray2Ptr);
            var op3    = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray3Ptr);
            var result = Avx.BlendVariable(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
Esempio n. 17
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Avx.BlendVariable(
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Esempio n. 18
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Avx.BlendVariable(
                Avx.LoadVector256((Double *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector256((Double *)(_dataTable.inArray2Ptr)),
                Avx.LoadVector256((Double *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Esempio n. 19
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Avx.BlendVariable(
                Avx.LoadVector256((Double *)(&test._fld1)),
                Avx.LoadVector256((Double *)(&test._fld2)),
                Avx.LoadVector256((Double *)(&test._fld3))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Esempio n. 20
0
        /// <summary>
        /// Absolute error bounded by 1e-4.
        /// </summary>
        public static Vector256 <float> Log(Vector256 <float> x)
        {
            Vector256 <float> exp, addcst, val;

            exp = Avx2.ConvertToVector256Single(Avx2.ShiftRightArithmetic(x.As <float, int>(), 23));

            // According to BenchmarkDotNet, isolating all the constants up-front
            // yield nearly 10% speed-up.

            const float bf0 = -89.970756366f;
            const float bf1 = float.NaN; // behavior of MathF.Log() on negative numbers
            const float bf2 = 3.529304993f;
            const float bf3 = -2.461222105f;
            const float bf4 = 1.130626167f;
            const float bf5 = -0.288739945f;
            const float bf6 = 3.110401639e-2f;
            const float bf7 = 0.6931471805f;

            const int bi0 = 0x7FFFFF;
            const int bi1 = 0x3F800000;

            //addcst = val > 0 ? -89.970756366f : -(float)INFINITY;

            addcst = Avx.BlendVariable(Vector256.Create(bf0),
                                       Vector256.Create(bf1),
                                       Avx.Compare(x, Vector256 <float> .Zero, FloatComparisonMode.OrderedLessThanNonSignaling));

            val = Avx2.Or(Avx2.And(
                              x.As <float, int>(),
                              Vector256.Create(bi0)),
                          Vector256.Create(bi1)).As <int, float>();

            /*    x * (3.529304993f +
             *      x * (-2.461222105f +
             *        x * (1.130626167f +
             *          x * (-0.288739945f +
             *            x * 3.110401639e-2f))))
             + (addcst + 0.6931471805f*exp); */

            return(Avx2.Add(
                       Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf2),
                                                   Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf3),
                                                                               Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf4),
                                                                                                           Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf5),
                                                                                                                                       Avx2.Multiply(val, Vector256.Create(bf6)))))))))),
                       Avx.Add(addcst,
                               Avx2.Multiply(Vector256.Create(bf7), exp))));
        }
Esempio n. 21
0
            public void RunStructFldScenario_Load(SimpleTernaryOpTest__BlendVariableDouble testClass)
            {
                fixed(Vector256 <Double> *pFld1 = &_fld1)
                fixed(Vector256 <Double> *pFld2 = &_fld2)
                fixed(Vector256 <Double> *pFld3 = &_fld3)
                {
                    var result = Avx.BlendVariable(
                        Avx.LoadVector256((Double *)(pFld1)),
                        Avx.LoadVector256((Double *)(pFld2)),
                        Avx.LoadVector256((Double *)(pFld3))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr);
                }
            }
Esempio n. 22
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector256 <Double> *pClsVar1 = &_clsVar1)
            fixed(Vector256 <Double> *pClsVar2 = &_clsVar2)
            fixed(Vector256 <Double> *pClsVar3 = &_clsVar3)
            {
                var result = Avx.BlendVariable(
                    Avx.LoadVector256((Double *)(pClsVar1)),
                    Avx.LoadVector256((Double *)(pClsVar2)),
                    Avx.LoadVector256((Double *)(pClsVar3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
            }
        }
Esempio n. 23
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector256 <Double> *pFld1 = &_fld1)
            fixed(Vector256 <Double> *pFld2 = &_fld2)
            fixed(Vector256 <Double> *pFld3 = &_fld3)
            {
                var result = Avx.BlendVariable(
                    Avx.LoadVector256((Double *)(pFld1)),
                    Avx.LoadVector256((Double *)(pFld2)),
                    Avx.LoadVector256((Double *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
            }
        }
Esempio n. 24
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleTernaryOpTest__BlendVariableSingle();

            fixed(Vector256 <Single> *pFld1 = &test._fld1)
            fixed(Vector256 <Single> *pFld2 = &test._fld2)
            fixed(Vector256 <Single> *pFld3 = &test._fld3)
            {
                var result = Avx.BlendVariable(
                    Avx.LoadVector256((Single *)(pFld1)),
                    Avx.LoadVector256((Single *)(pFld2)),
                    Avx.LoadVector256((Single *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
            }
        }
Esempio n. 25
0
        public (double near, double far) IntersectAVX(Ray ray)
        {
            Vector256 <double> origin    = (Vector256 <double>)ray.Origin;
            Vector256 <double> direction = (Vector256 <double>)ray.Direction;

            Vector256 <double> zeroes = new Vector256 <double>();
            Vector256 <double> min    = (Vector256 <double>)Minimum;
            Vector256 <double> max    = (Vector256 <double>)Maximum;

            // Replace slabs that won't be checked (0 direction axis) with infinity so that NaN doesn't propagate
            Vector256 <double> dirInfMask = Avx.And(
                Avx.Compare(direction, zeroes, FloatComparisonMode.OrderedEqualNonSignaling),
                Avx.And(
                    Avx.Compare(origin, min, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling),
                    Avx.Compare(origin, max, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling)));

            min = Avx.BlendVariable(min, SIMDHelpers.BroadcastScalar4(double.NegativeInfinity), dirInfMask);
            max = Avx.BlendVariable(max, SIMDHelpers.BroadcastScalar4(double.PositiveInfinity), dirInfMask);

            // Flip slabs in direction axes that are negative (using direction as mask takes the most significant bit, the sign.. probably includes -0)
            Vector256 <double> minMasked = Avx.BlendVariable(min, max, direction);
            Vector256 <double> maxMasked = Avx.BlendVariable(max, min, direction);

            direction = Avx.Divide(Vector256.Create(1D), direction);
            Vector256 <double> near4 = Avx.Multiply(Avx.Subtract(minMasked, origin), direction);
            Vector256 <double> far4  = Avx.Multiply(Avx.Subtract(maxMasked, origin), direction);

            Vector128 <double> near2 = Sse2.Max(near4.GetLower(), near4.GetUpper());

            near2 = Sse2.MaxScalar(near2, SIMDHelpers.Swap(near2));
            Vector128 <double> far2 = Sse2.Min(far4.GetLower(), far4.GetUpper());

            far2 = Sse2.MinScalar(far2, SIMDHelpers.Swap(far2));

            if (Sse2.CompareScalarOrderedGreaterThan(near2, far2) | Sse2.CompareScalarOrderedLessThan(far2, new Vector128 <double>()))
            {
                return(double.NaN, double.NaN);
            }

            return(near2.ToScalar(), far2.ToScalar());
        }
Esempio n. 26
0
        public static Vector128 <float> GetBrucePsmeAbgrGrowthEffectiveAge(SiteConstants site, float timeStepInYears, Vector128 <float> treeHeight, out Vector128 <float> potentialHeightGrowth)
        {
            Vector128 <float> B1     = AvxExtensions.BroadcastScalarToVector128(site.B1);
            Vector128 <float> B2     = AvxExtensions.BroadcastScalarToVector128(site.B2);
            Vector128 <float> X2toB2 = AvxExtensions.BroadcastScalarToVector128(site.X2toB2);
            Vector128 <float> siteIndexFromGround128 = AvxExtensions.BroadcastScalarToVector128(site.SiteIndexFromGround);
            Vector128 <float> X1 = AvxExtensions.BroadcastScalarToVector128(site.X1);

            Vector128 <float> XX1                = Avx.Add(Avx.Divide(MathV.Ln(Avx.Divide(treeHeight, siteIndexFromGround128)), B1), X2toB2);
            Vector128 <float> xx1lessThanZero    = Avx.CompareLessThanOrEqual(XX1, Vector128 <float> .Zero);
            Vector128 <float> growthEffectiveAge = Avx.Subtract(MathV.Pow(XX1, Avx.Reciprocal(B2)), X1);

            growthEffectiveAge = Avx.BlendVariable(growthEffectiveAge, AvxExtensions.BroadcastScalarToVector128(500.0F), xx1lessThanZero);

            Vector128 <float> timeStepInYearsPlusX1 = AvxExtensions.BroadcastScalarToVector128(timeStepInYears + site.X1);
            Vector128 <float> potentialHeightPower  = Avx.Multiply(B1, Avx.Subtract(MathV.Pow(Avx.Add(growthEffectiveAge, timeStepInYearsPlusX1), B2), X2toB2));
            Vector128 <float> potentialHeight       = Avx.Multiply(siteIndexFromGround128, MathV.Exp(potentialHeightPower));

            potentialHeightGrowth = Avx.Subtract(potentialHeight, treeHeight);

            return(growthEffectiveAge);
        }
Esempio n. 27
0
            unsafe void IConversionProcessor.ConvertLine(byte *ipstart, byte *opstart, int cb)
            {
                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                byte * op = opstart;

#if HWINTRINSICS
                if (Avx2.IsSupported)
                {
                    var vzero  = Vector256 <float> .Zero;
                    var vmin   = Vector256.Create(0.5f / byte.MaxValue);
                    var vscale = Vector256.Create((float)byte.MaxValue);

                    var vmaskp = Avx.LoadVector256((int *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.PermuteMaskDeinterleave8x32)));

                    ipe -= Vector256 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Avx.LoadVector256(ip);
                        var vf1 = Avx.LoadVector256(ip + Vector256 <float> .Count);
                        var vf2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2);
                        var vf3 = Avx.LoadVector256(ip + Vector256 <float> .Count * 3);
                        ip += Vector256 <byte> .Count;

                        var vfa0 = Avx.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Avx.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Avx.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Avx.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Avx.Max(vfa0, vmin);
                        vfa1 = Avx.Max(vfa1, vmin);
                        vfa2 = Avx.Max(vfa2, vmin);
                        vfa3 = Avx.Max(vfa3, vmin);

                        vf0 = Avx.Multiply(vf0, Avx.Reciprocal(vfa0));
                        vf1 = Avx.Multiply(vf1, Avx.Reciprocal(vfa1));
                        vf2 = Avx.Multiply(vf2, Avx.Reciprocal(vfa2));
                        vf3 = Avx.Multiply(vf3, Avx.Reciprocal(vfa3));

                        vf0 = Avx.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Avx.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Avx.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Avx.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Avx.BlendVariable(vf0, vzero, HWIntrinsics.AvxCompareEqual(vfa0, vmin));
                        vf1 = Avx.BlendVariable(vf1, vzero, HWIntrinsics.AvxCompareEqual(vfa1, vmin));
                        vf2 = Avx.BlendVariable(vf2, vzero, HWIntrinsics.AvxCompareEqual(vfa2, vmin));
                        vf3 = Avx.BlendVariable(vf3, vzero, HWIntrinsics.AvxCompareEqual(vfa3, vmin));

                        vf0 = Avx.Multiply(vf0, vscale);
                        vf1 = Avx.Multiply(vf1, vscale);
                        vf2 = Avx.Multiply(vf2, vscale);
                        vf3 = Avx.Multiply(vf3, vscale);

                        var vi0 = Avx.ConvertToVector256Int32(vf0);
                        var vi1 = Avx.ConvertToVector256Int32(vf1);
                        var vi2 = Avx.ConvertToVector256Int32(vf2);
                        var vi3 = Avx.ConvertToVector256Int32(vf3);

                        var vs0 = Avx2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Avx2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Avx2.PackUnsignedSaturate(vs0, vs1);
                        vb0 = Avx2.PermuteVar8x32(vb0.AsInt32(), vmaskp).AsByte();

                        Avx.Store(op, vb0);
                        op += Vector256 <byte> .Count;
                    }
                    ipe += Vector256 <byte> .Count;
                }
                else if (Sse41.IsSupported)
                {
                    var vzero  = Vector128 <float> .Zero;
                    var vmin   = Vector128.Create(0.5f / byte.MaxValue);
                    var vscale = Vector128.Create((float)byte.MaxValue);

                    ipe -= Vector128 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Sse.LoadVector128(ip);
                        var vf1 = Sse.LoadVector128(ip + Vector128 <float> .Count);
                        var vf2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2);
                        var vf3 = Sse.LoadVector128(ip + Vector128 <float> .Count * 3);
                        ip += Vector128 <byte> .Count;

                        var vfa0 = Sse.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Sse.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Sse.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Sse.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Sse.Max(vfa0, vmin);
                        vfa1 = Sse.Max(vfa1, vmin);
                        vfa2 = Sse.Max(vfa2, vmin);
                        vfa3 = Sse.Max(vfa3, vmin);

                        vf0 = Sse.Multiply(vf0, Sse.Reciprocal(vfa0));
                        vf1 = Sse.Multiply(vf1, Sse.Reciprocal(vfa1));
                        vf2 = Sse.Multiply(vf2, Sse.Reciprocal(vfa2));
                        vf3 = Sse.Multiply(vf3, Sse.Reciprocal(vfa3));

                        vf0 = Sse41.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Sse41.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Sse41.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Sse41.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Sse41.BlendVariable(vf0, vzero, Sse.CompareEqual(vfa0, vmin));
                        vf1 = Sse41.BlendVariable(vf1, vzero, Sse.CompareEqual(vfa1, vmin));
                        vf2 = Sse41.BlendVariable(vf2, vzero, Sse.CompareEqual(vfa2, vmin));
                        vf3 = Sse41.BlendVariable(vf3, vzero, Sse.CompareEqual(vfa3, vmin));

                        vf0 = Sse.Multiply(vf0, vscale);
                        vf1 = Sse.Multiply(vf1, vscale);
                        vf2 = Sse.Multiply(vf2, vscale);
                        vf3 = Sse.Multiply(vf3, vscale);

                        var vi0 = Sse2.ConvertToVector128Int32(vf0);
                        var vi1 = Sse2.ConvertToVector128Int32(vf1);
                        var vi2 = Sse2.ConvertToVector128Int32(vf2);
                        var vi3 = Sse2.ConvertToVector128Int32(vf3);

                        var vs0 = Sse2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Sse2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Sse2.PackUnsignedSaturate(vs0, vs1);

                        Sse2.Store(op, vb0);
                        op += Vector128 <byte> .Count;
                    }
                    ipe += Vector128 <byte> .Count;
                }
#endif

                float fmax = new Vector4(byte.MaxValue).X, fround = new Vector4(0.5f).X, fmin = fround / fmax;

                while (ip < ipe)
                {
                    float f3 = ip[3];
                    if (f3 < fmin)
                    {
                        *(uint *)op = 0;
                    }
                    else
                    {
                        float f3i = fmax / f3;
                        byte  o0  = ClampToByte((int)(ip[0] * f3i + fround));
                        byte  o1  = ClampToByte((int)(ip[1] * f3i + fround));
                        byte  o2  = ClampToByte((int)(ip[2] * f3i + fround));
                        byte  o3  = ClampToByte((int)(f3 * fmax + fround));
                        op[0] = o0;
                        op[1] = o1;
                        op[2] = o2;
                        op[3] = o3;
                    }

                    ip += 4;
                    op += 4;
                }
            }
        public unsafe void Vector256Mandel()
        {
            int floatL3Size = TOTALBYTES / sizeof(float);

            resolutionX = (int)MathF.Floor(MathF.Sqrt(floatL3Size * ratioy_x));
            if (resolutionX % 8 != 0)
            {
                resolutionX -= resolutionX % 8;
            }
            resolutionY = (int)MathF.Floor(resolutionX * ratioy_x);
            if (resolutionY % 8 != 0)
            {
                resolutionY -= resolutionY % 8;
            }
            STEP_X         = (RIGHT_X - LEFT_X) / resolutionX;
            STEP_Y         = STEP_X; // ratioy_x * STEP_X; Bug from reddit comment
            numberOfPoints = resolutionX * resolutionY;
            results2       = new float[numberOfPoints];

            xPoints = new float[resolutionX];
            yPoints = new float[resolutionY];
            for (int i = 0; i < resolutionX; i++)
            {
                xPoints.Span[i] = LEFT_X + i * STEP_X;
            }
            for (int i = 0; i < resolutionY; i++)
            {
                yPoints.Span[i] = TOP_Y - i * STEP_Y;
            }

            int countX = 0, countY = 0;
            int maxInter = 256;
            int inter;
            ReadOnlySpan <float> ySpan = yPoints.Span;// MemoryMarshal.Cast<float, Vector256<float>>(yPoints.Span);
            ReadOnlySpan <Vector256 <float> > xSpan    = MemoryMarshal.Cast <float, Vector256 <float> >(xPoints.Span);
            Span <Vector256 <float> >         res      = MemoryMarshal.Cast <float, Vector256 <float> >(results2.Span);
            Span <Vector256 <float> >         testSpan = MemoryMarshal.Cast <float, Vector256 <float> >(testValue2.Span);
            int resVectorNumber = 0;

            Vector256 <float> xVec, yVec;
            var oneVec  = Vector256.Create(1.0f);
            var fourVec = Vector256.Create(4.0f);

            while (countY < ySpan.Length)
            {
                var currYVec = Vector256.Create(ySpan[countY]);
                while (countX < xSpan.Length)
                {
                    Vector256 <float> currXVec = xSpan[countX];
                    var xSquVec  = Vector256.Create(0.0f);
                    var ySquVec  = Vector256.Create(0.0f);
                    var zSquVec  = Vector256.Create(0.0f);
                    var interVec = Vector256.Create(0.0f);
                    Vector256 <float> sumVector = oneVec;
                    inter = 0;
                    bool goOn = true;
                    while (goOn)
                    {
                        xVec    = Avx.Add(Avx.Subtract(xSquVec, ySquVec), currXVec);
                        yVec    = Avx.Add(Avx.Subtract(Avx.Subtract(zSquVec, ySquVec), xSquVec), currYVec);
                        xSquVec = Avx.Multiply(xVec, xVec);
                        ySquVec = Avx.Multiply(yVec, yVec);
                        zSquVec = Avx.Multiply(Avx.Add(xVec, yVec), Avx.Add(xVec, yVec));
                        Vector256 <float> test = Avx.Compare(Avx.Add(xSquVec, ySquVec), fourVec, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling); // <= 4.0?
                        sumVector = Avx.BlendVariable(Vector256 <float> .Zero, sumVector, test);                                                          // selects from second if true, from first otherwise
                        goOn      = (Avx.MoveMask(test) > 0) & (inter < maxInter);                                                                        //any of the values still alive, and inter still below cutoff value?
                        if (goOn)
                        {
                            interVec = Avx.Add(interVec, sumVector);
                        }
                        inter = goOn ? inter + 1 : inter;
                    }
                    testSpan[resVectorNumber] = Avx.Add(xSquVec, ySquVec);
                    res[resVectorNumber]      = interVec;
                    resVectorNumber++;
                    countX++;
                }
                countX = 0;
                countY++;
            }
        }
Esempio n. 29
0
        public unsafe void Vector256Mandel()
        {
            int countX = 0, countY = 0;
            int maxInter = 256;
            int inter;
            ReadOnlySpan <float> ySpan = yPoints.Span;
            ReadOnlySpan <Vector256 <float> > xSpan = MemoryMarshal.Cast <float, Vector256 <float> >(xPoints.Span);
            Span <Vector256 <float> >         res   = MemoryMarshal.Cast <float, Vector256 <float> >(results.Span);
            int resVectorNumber = 0;

            Vector256 <float> xVec, yVec;
            Vector256 <float> zeroVec = Vector256 <float> .Zero;
            var oneVec   = Vector256.Create(1.0f);
            var fourVec  = Vector256.Create(4.0f);
            var one4Vec  = Vector256.Create(0.25f);
            var one16Vec = Vector256.Create(1.0f / 16.0f);
            Vector256 <float> qVec;
            Vector256 <float> test;

            while (countY < ySpan.Length)
            {
                var currYVec = Vector256.Create(ySpan[countY]);
                while (countX < xSpan.Length)
                {
                    Vector256 <float> currXVec = xSpan[countX];
                    Vector256 <float> xSquVec  = zeroVec;
                    Vector256 <float> ySquVec  = zeroVec;
                    Vector256 <float> zSquVec  = zeroVec;
                    Vector256 <float> interVec = zeroVec;
                    Vector256 <float> sumVector;

                    inter = 0;
                    bool goOn;
                    Vector256 <float> temp  = Avx.Subtract(currXVec, one4Vec);
                    Vector256 <float> temp1 = Avx.Multiply(currYVec, currYVec);
                    qVec = Avx.Add(Avx.Multiply(temp, temp), temp1);
                    Vector256 <float> temp2 = Avx.Multiply(qVec, Avx.Add(qVec, temp));
                    test = Avx.Compare(temp2, Avx.Multiply(one4Vec, temp1), FloatComparisonMode.OrderedGreaterThanNonSignaling);
                    goOn = (Avx.MoveMask(test) > 0);
                    if (goOn)
                    {
                        temp2 = Avx.Add(currXVec, oneVec);
                        temp  = Avx.Add(Avx.Multiply(temp2, temp2), temp1);
                        test  = Avx.Compare(temp, one16Vec, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                        goOn  = Avx.MoveMask(test) > 0;
                        if (!goOn)
                        {
                            interVec = Vector256.Create(255.0f); // make all point = maximum value
                        }
                    }
                    while (goOn)
                    {
                        xVec      = Avx.Add(Avx.Subtract(xSquVec, ySquVec), currXVec);
                        yVec      = Avx.Add(Avx.Subtract(Avx.Subtract(zSquVec, ySquVec), xSquVec), currYVec);
                        xSquVec   = Avx.Multiply(xVec, xVec);
                        ySquVec   = Avx.Multiply(yVec, yVec);
                        temp      = Avx.Add(xVec, yVec);
                        zSquVec   = Avx.Multiply(temp, temp);
                        test      = Avx.Compare(Avx.Add(xSquVec, ySquVec), fourVec, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling); // <= 4.0?
                        sumVector = Avx.BlendVariable(zeroVec, oneVec, test);

                        goOn = (Avx.MoveMask(test) > 0) & (inter < maxInter); //any of the values still alive, and inter still below cutoff value?
                        if (goOn)
                        {
                            interVec = Avx.Add(interVec, sumVector);
                        }
                        inter = goOn ? inter + 1 : inter;
                    }
                    res[resVectorNumber] = interVec;
                    resVectorNumber++;
                    countX++;
                }
                countX = 0;
                countY++;
            }
        }
Esempio n. 30
0
        // Select

        public static f32 Select_f32(m32 m, f32 a, f32 b)
        {
            return(Avx.BlendVariable(b, a, m.AsSingle()));
        }