예제 #1
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Avx.IsSupported)
            {
                using (TestTable <float> floatTable = new TestTable <float>(new float[8] {
                    22, -1, -50, 0, 22, -1, -50, 0
                }, new float[8] {
                    22, -1, -50, 0, 22, -1, -50, 0
                }, new float[8]))
                    using (TestTable <double> doubleTable = new TestTable <double>(new double[4] {
                        1, -5, 100, 0
                    }, new double[4] {
                        22, -1, -50, 0
                    }, new double[4]))
                    {
                        var vf1 = Unsafe.Read <Vector256 <float> >(floatTable.inArray1Ptr);
                        var vf2 = Unsafe.Read <Vector256 <float> >(floatTable.inArray2Ptr);
                        var vf3 = Avx.HorizontalAdd(vf1, vf2);
                        Unsafe.Write(floatTable.outArrayPtr, vf3);

                        if (!floatTable.CheckResult((left, right, result) =>
                                                    (left[0] + left[1] == result[0]) && (right[0] + right[1] == result[2]) &&
                                                    (left[2] + left[3] == result[1]) && (right[2] + right[3] == result[3]) &&
                                                    (left[4] + left[5] == result[4]) && (right[4] + right[5] == result[6]) &&
                                                    (left[6] + left[7] == result[5]) && (right[6] + right[7] == result[7])))
                        {
                            Console.WriteLine("Avx HorizontalAdd failed on float:");
                            foreach (var item in floatTable.outArray)
                            {
                                Console.Write(item + ", ");
                            }
                            Console.WriteLine();
                            testResult = Fail;
                        }

                        var vd1 = Unsafe.Read <Vector256 <double> >(doubleTable.inArray1Ptr);
                        var vd2 = Unsafe.Read <Vector256 <double> >(doubleTable.inArray2Ptr);
                        var vd3 = Avx.HorizontalAdd(vd1, vd2);
                        Unsafe.Write(doubleTable.outArrayPtr, vd3);

                        if (!doubleTable.CheckResult((left, right, result) =>
                                                     (left[0] + left[1] == result[0]) && (right[0] + right[1] == result[1]) &&
                                                     (left[2] + left[3] == result[2]) && (right[2] + right[3] == result[3])))
                        {
                            Console.WriteLine("Avx HorizontalAdd failed on double:");
                            foreach (var item in doubleTable.outArray)
                            {
                                Console.Write(item + ", ");
                            }
                            Console.WriteLine();
                            testResult = Fail;
                        }
                    }
            }


            return(testResult);
        }
예제 #2
0
        public static Span <Complex> Multiply(ReadOnlySpan <Complex> left, ReadOnlySpan <Complex> right)
        {
            var result      = new Complex[Math.Min(left.Length, right.Length)].AsSpan();
            var vectorRes   = MemoryMarshal.Cast <Complex, Vector256 <double> >(result);
            var vectorLeft  = MemoryMarshal.Cast <Complex, Vector256 <double> >(left);
            var vectorRight = MemoryMarshal.Cast <Complex, Vector256 <double> >(right);

            for (int i = 0; i < vectorRes.Length; i++)
            {
                var l = vectorLeft[i];
                var r = vectorRight[i];
                vectorRes[i] = Avx.HorizontalAdd(
                    Avx.Multiply(
                        Avx.Multiply(l, r),
                        Vector256.Create(1.0, -1.0, 1.0, -1.0)),
                    Avx.Multiply(
                        l,
                        Avx.Permute(r, 0b0101)
                        ));
            }
            for (int i = 2 * vectorRes.Length; i < result.Length; i++)
            {
                result[i] = left[i] * right[i];
            }
            return(result);
        }
예제 #3
0
        public static Vector256 <double> HorizontalAdd(Vector256 <double> left, Vector256 <double> right)
        {
            if (Avx.IsSupported)
            {
                return(Avx.HorizontalAdd(left, right));
            }

            return(HorizontalAdd_Software(left, right));
        }
예제 #4
0
        private Hit[] RayTraceAVXFaster(Ray ray)
        {
            Vector256 <double> dir      = (Vector256 <double>)ray.Direction;
            Vector256 <double> vert0    = (Vector256 <double>)Vert0.Position;
            Vector256 <double> edge0to1 = (Vector256 <double>)Edge0to1;
            Vector256 <double> edge0to2 = (Vector256 <double>)Edge0to2;

            Vector256 <double> offset = Avx.Subtract((Vector256 <double>)ray.Origin, vert0);
            Vector256 <double> side1  = SIMDHelpers.Cross(offset, edge0to1);
            Vector256 <double> side2  = SIMDHelpers.Cross(dir, edge0to2);

            // Prepare all dot products
            Vector256 <double> uvTemp    = Avx.Multiply(offset, side2);         // u
            Vector256 <double> temp      = Avx.Multiply(dir, side1);            // v
            Vector256 <double> edge2Temp = Avx.Multiply(edge0to2, side1);
            Vector256 <double> distTemp  = Avx.Multiply(edge0to1, side2);

            uvTemp    = Avx.HorizontalAdd(uvTemp, temp);
            edge2Temp = Avx.HorizontalAdd(edge2Temp, edge2Temp);
            distTemp  = Avx.HorizontalAdd(distTemp, distTemp);

            // Complete all dot products for SSE ops
            Vector128 <double> uvs   = SIMDHelpers.Add2(uvTemp);
            Vector128 <double> dist  = SIMDHelpers.Add2(edge2Temp);
            Vector128 <double> temp1 = SIMDHelpers.Add2(distTemp);
            Vector128 <double> temp2;

            // vec2 constants we'll be using later
            Vector128 <double> ones2   = SIMDHelpers.BroadcastScalar2(1D);
            Vector128 <double> zeroes2 = new Vector128 <double>();

            // Reciprocal of distance along edge0to1
            temp1 = Sse2.Divide(ones2, temp1);
            temp2 = Sse2.CompareOrdered(temp1, temp1);
            // Remove NaNs from the result, replaced with 0
            Vector128 <double> distZeroed = Sse2.And(temp1, temp2);

            uvs  = Sse2.Multiply(uvs, distZeroed);
            dist = Sse2.Multiply(dist, distZeroed);

            // compare uvs < 0 and > 1, dist < 0, jump out if any of those conditions are met
            temp1 = Sse2.CompareLessThan(uvs, zeroes2);
            temp2 = Mirror ? uvs : Sse3.HorizontalAdd(uvs, uvs);
            temp2 = Sse2.CompareGreaterThan(temp2, ones2);
            temp1 = Sse2.Or(temp1, temp2);
            temp2 = Sse2.CompareLessThan(dist, zeroes2);
            temp1 = Sse2.Or(temp1, temp2);

            if (!Avx.TestZ(temp1, temp1))
            {
                return(default);
예제 #5
0
        public static double ReduceSum(this Vector <double> vector)
        {
#if NETCOREAPP3_0
            if (Avx.IsSupported)
            {
                Vector256 <double> a     = Unsafe.As <Vector <double>, Vector256 <double> >(ref vector);
                Vector256 <double> tmp   = Avx.HorizontalAdd(a, a);
                Vector128 <double> hi128 = tmp.GetUpper();
                Vector128 <double> lo128 = tmp.GetLower();
                Vector128 <double> s     = Sse2.Add(lo128, hi128);

                return(s.ToScalar());
            }
#endif
            return(Vector.Dot(Vector <double> .One, vector));
        }
예제 #6
0
        public static Vector256 <double> DotProduct4D(Vector256 <double> left, Vector256 <double> right)
        {
            if (Avx.IsSupported)
            {
                Vector256 <double> result = Avx.Multiply(left, right);

                // We now have (X, Y, Z, 0) correctly, and want to add them together and fill with that result
                result = Avx.HorizontalAdd(result, result);

                // Now we have (X + Y, X + Y, Z + 0, Z + 0)
                result = Avx.Add(result, Avx.Permute2x128(result, result, 0b_0000_0001));
                // We switch the 2 halves, and add that to the original, getting the result in all elems

                return(result);
            }

            return(DotProduct4D_Software(left, right));
        }
예제 #7
0
        public Vector256 <double> Permute(Vector256 <double> left, Vector256 <double> right)
        {
            Vector256 <double> mul = Avx.Multiply(left, right);

            // Set W to zero
            Vector256 <double> result = Avx.And(mul, MaskWDouble);

            // We now have (X, Y, Z, 0) correctly, and want to add them together and fill with that result
            result = Avx.HorizontalAdd(result, result);

            // Now we have (X + Y, X + Y, Z + 0, Z + 0)
            result = Avx.Add(result, Avx.Permute2x128(result, result, 0b_0000_0001));
            // We switch the 2 halves, and add that to the original, getting the result in all elems

            // Set W to zero
            result = Avx.And(result, MaskWDouble);

            return(result);
        }
예제 #8
0
        public Vector256 <double> DoubleHadd(Vector256 <double> left, Vector256 <double> right)
        {
            Vector256 <double> mul = Avx.Multiply(left, right);

            // Set W to zero
            Vector256 <double> result = Avx.And(mul, MaskWDouble);

            // We now have (X, Y, Z, 0) correctly, and want to add them together and fill with that result
            result = Avx.HorizontalAdd(result, result);

            // Now we have (X + Y, X + Y, Z + 0, Z + 0)
            result = Avx.Shuffle(result, result, ShuffleValues._3_1_2_0);

            result = Avx.HorizontalAdd(result, result);
            // We switch the 2 halves, and add that to the original, getting the result in all elems

            // Set W to zero
            result = Avx.And(result, MaskWDouble);

            return(result);
        }
예제 #9
0
        public static unsafe float Sum_AVX(float[] array)
        {
            Vector256 <float> sum = Avx.SetZeroVector256 <float>();

            fixed(float *ptr = &array[0])
            {
                for (int i = 0; i < array.Length; i += 8)
                {
                    var current = Avx.LoadVector256(ptr + i);
                    sum = Avx.Add(current, sum);
                }
            }

            // sum all values in __m256 (horizontal sum)
            var ha      = Avx.HorizontalAdd(sum, sum);
            var ha2     = Avx.HorizontalAdd(ha, ha);
            var lo      = Avx.ExtractVector128(ha2, 1);
            var resultV = Sse.Add(Avx.GetLowerHalf(ha2), lo);

            return(Sse.ConvertToSingle(resultV));
        }
예제 #10
0
        public static Vector256 <double> DotProduct3D(Vector256 <double> left, Vector256 <double> right)
        {
            // We can use AVX to vectorize the multiplication
            if (Avx.IsSupported)
            {
                Vector256 <double> mul = Avx.Multiply(left, right);

                // Set W to zero
                Vector256 <double> result = Avx.And(mul, DoubleConstants.MaskW);

                // We now have (X, Y, Z, 0) correctly, and want to add them together and fill with that result
                result = Avx.HorizontalAdd(result, result);

                // Now we have (X + Y, X + Y, Z + 0, Z + 0)
                result = Avx.Add(result, Avx.Permute2x128(result, result, 0b_0000_0001));
                // We switch the 2 halves, and add that to the original, getting the result in all elems

                return(result);
            }

            return(DotProduct3D_Software(left, right));
        }
        // This function implements Algorithm 1 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf.
        // Compute the output value of the field-aware factorization, as the sum of the linear part and the latent part.
        // The linear part is the inner product of linearWeights and featureValues.
        // The latent part is the sum of all intra-field interactions in one field f, for all fields possible
        public static unsafe void CalculateIntermediateVariables(int *fieldIndices, int *featureIndices, float *featureValues,
                                                                 float *linearWeights, float *latentWeights, float *latentSum, float *response, int fieldCount, int latentDim, int count)
        {
            Contracts.Assert(Avx.IsSupported);

            // The number of all possible fields.
            int    m              = fieldCount;
            int    d              = latentDim;
            int    c              = count;
            int *  pf             = fieldIndices;
            int *  pi             = featureIndices;
            float *px             = featureValues;
            float *pw             = linearWeights;
            float *pv             = latentWeights;
            float *pq             = latentSum;
            float  linearResponse = 0;
            float  latentResponse = 0;

            Unsafe.InitBlock(pq, 0, (uint)(m * m * d * sizeof(float)));

            Vector256 <float> y   = Vector256 <float> .Zero;
            Vector256 <float> tmp = Vector256 <float> .Zero;

            for (int i = 0; i < c; i++)
            {
                int f = pf[i];
                int j = pi[i];
                linearResponse += pw[j] * px[i];

                Vector256 <float> x  = Avx.BroadcastScalarToVector256(px + i);
                Vector256 <float> xx = Avx.Multiply(x, x);

                // tmp -= <v_j,f, v_j,f> * x * x
                int vBias = j * m * d + f * d;

                // j-th feature's latent vector in the f-th field hidden space.
                float *vjf = pv + vBias;

                for (int k = 0; k + 8 <= d; k += 8)
                {
                    Vector256 <float> vjfBuffer = Avx.LoadVector256(vjf + k);
                    tmp = MultiplyAddNegated(Avx.Multiply(vjfBuffer, vjfBuffer), xx, tmp);
                }

                for (int fprime = 0; fprime < m; fprime++)
                {
                    vBias = j * m * d + fprime * d;
                    int    qBias    = f * m * d + fprime * d;
                    float *vjfprime = pv + vBias;
                    float *qffprime = pq + qBias;

                    // q_f,f' += v_j,f' * x
                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        Vector256 <float> vjfprimeBuffer = Avx.LoadVector256(vjfprime + k);
                        Vector256 <float> q = Avx.LoadVector256(qffprime + k);
                        q = MultiplyAdd(vjfprimeBuffer, x, q);
                        Avx.Store(qffprime + k, q);
                    }
                }
            }

            for (int f = 0; f < m; f++)
            {
                // tmp += <q_f,f, q_f,f>
                float *qff = pq + f * m * d + f * d;
                for (int k = 0; k + 8 <= d; k += 8)
                {
                    Vector256 <float> qffBuffer = Avx.LoadVector256(qff + k);

                    // Intra-field interactions.
                    tmp = MultiplyAdd(qffBuffer, qffBuffer, tmp);
                }

                // y += <q_f,f', q_f',f>, f != f'
                // Whis loop handles inter - field interactions because f != f'.
                for (int fprime = f + 1; fprime < m; fprime++)
                {
                    float *qffprime = pq + f * m * d + fprime * d;
                    float *qfprimef = pq + fprime * m * d + f * d;
                    for (int k = 0; k + 8 <= d; k += 8)
                    {
                        // Inter-field interaction.
                        Vector256 <float> qffprimeBuffer = Avx.LoadVector256(qffprime + k);
                        Vector256 <float> qfprimefBuffer = Avx.LoadVector256(qfprimef + k);
                        y = MultiplyAdd(qffprimeBuffer, qfprimefBuffer, y);
                    }
                }
            }

            y   = MultiplyAdd(_point5, tmp, y);
            tmp = Avx.Add(y, Avx.Permute2x128(y, y, 1));
            tmp = Avx.HorizontalAdd(tmp, tmp);
            y   = Avx.HorizontalAdd(tmp, tmp);
            Sse.StoreScalar(&latentResponse, y.GetLower()); // The lowest slot is the response value.
            *response = linearResponse + latentResponse;
        }
예제 #12
0
 private static float SumVector256(Vector256 <float> v)
 {
     v = Avx.HorizontalAdd(v, v); //0+1, 2+3, .., .., 4+5, 6+7, .., ..
     v = Avx.HorizontalAdd(v, v); //0+1+2+3, .., .., .., 4+5+6+7, .., .., ..
     return(v.GetUpper().ToScalar() + v.GetLower().ToScalar());
 }
예제 #13
0
파일: FiaVolume.cs 프로젝트: OSU-MARS/SEEM
        public static unsafe float GetScribnerBoardFeetPerAcre(Trees trees)
        {
            // for now, assume all trees are of the same species
            if (trees.Species != FiaCode.PseudotsugaMenziesii)
            {
                throw new NotSupportedException();
            }
            if (trees.Units != Units.English)
            {
                throw new NotSupportedException();
            }

            // Douglas-fir
            #if DEBUG
            Vector128 <float> v6p8 = AvxExtensions.BroadcastScalarToVector128(6.8F);
            Vector128 <float> v10k = AvxExtensions.BroadcastScalarToVector128(10.0F * 1000.0F);
            #endif

            // constants
            Vector128 <float> forestersEnglish = AvxExtensions.BroadcastScalarToVector128(Constant.ForestersEnglish);
            Vector128 <float> one = AvxExtensions.BroadcastScalarToVector128(1.0F);
            Vector128 <float> six = AvxExtensions.BroadcastScalarToVector128(6.0F);

            Vector128 <float> vm3p21809   = AvxExtensions.BroadcastScalarToVector128(-3.21809F); // b4
            Vector128 <float> v0p04948    = AvxExtensions.BroadcastScalarToVector128(0.04948F);
            Vector128 <float> vm0p15664   = AvxExtensions.BroadcastScalarToVector128(-0.15664F);
            Vector128 <float> v2p02132    = AvxExtensions.BroadcastScalarToVector128(2.02132F);
            Vector128 <float> v1p63408    = AvxExtensions.BroadcastScalarToVector128(1.63408F);
            Vector128 <float> vm0p16184   = AvxExtensions.BroadcastScalarToVector128(-0.16184F);
            Vector128 <float> v1p033      = AvxExtensions.BroadcastScalarToVector128(1.033F);
            Vector128 <float> v1p382937   = AvxExtensions.BroadcastScalarToVector128(1.382937F);
            Vector128 <float> vm0p4015292 = AvxExtensions.BroadcastScalarToVector128(-0.4015292F);
            Vector128 <float> v0p087266   = AvxExtensions.BroadcastScalarToVector128(0.087266F);
            Vector128 <float> vm0p174533  = AvxExtensions.BroadcastScalarToVector128(-0.174533F);

            Vector128 <float> vm0p6896598794 = AvxExtensions.BroadcastScalarToVector128(-0.6896598794F); // rc6-rs632
            Vector128 <float> v0p993         = AvxExtensions.BroadcastScalarToVector128(0.993F);
            Vector128 <float> v0p174439      = AvxExtensions.BroadcastScalarToVector128(0.174439F);
            Vector128 <float> v0p117594      = AvxExtensions.BroadcastScalarToVector128(0.117594F);
            Vector128 <float> vm8p210585     = AvxExtensions.BroadcastScalarToVector128(-8.210585F);
            Vector128 <float> v0p236693      = AvxExtensions.BroadcastScalarToVector128(0.236693F);
            Vector128 <float> v0p00001345    = AvxExtensions.BroadcastScalarToVector128(0.00001345F);
            Vector128 <float> v0p00001937    = AvxExtensions.BroadcastScalarToVector128(0.00001937F);
            Vector128 <float> v1p001491      = AvxExtensions.BroadcastScalarToVector128(1.001491F);
            Vector128 <float> vm6p924097     = AvxExtensions.BroadcastScalarToVector128(-6.924097F);
            Vector128 <float> v0p912733      = AvxExtensions.BroadcastScalarToVector128(0.912733F);
            Vector128 <float> v0p00001351    = AvxExtensions.BroadcastScalarToVector128(0.00001351F);

            fixed(float *dbh = &trees.Dbh[0], expansionFactors = &trees.LiveExpansionFactor[0], height = &trees.Height[0])
            {
                Vector128 <float> standBoardFeetPerAcre = Vector128 <float> .Zero;

                for (int treeIndex = 0; treeIndex < trees.Count; treeIndex += Constant.Simd128x4.Width)
                {
                    Vector128 <float> dbhInInches  = Avx.LoadVector128(dbh + treeIndex);
                    Vector128 <float> heightInFeet = Avx.LoadVector128(height + treeIndex);

                    Vector128 <float> logDbhInInches  = MathV.Log10(dbhInInches);
                    Vector128 <float> logHeightInFeet = MathV.Log10(heightInFeet);
                    // FiaCode.PseudotsugaMenziesii => -3.21809F + 0.04948F * logHeightInFeet * logDbhInInches - 0.15664F * logDbhInInches * logDbhInInches +
                    //                                  2.02132F * logDbhInInches + 1.63408F * logHeightInFeet - 0.16184F * logHeightInFeet * logHeightInFeet,
                    Vector128 <float> cvtsl = Avx.Add(vm3p21809, Avx.Multiply(v0p04948, Avx.Multiply(logHeightInFeet, logDbhInInches)));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p15664, Avx.Multiply(logDbhInInches, logDbhInInches)));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(v2p02132, logDbhInInches));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(v1p63408, logHeightInFeet));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p16184, Avx.Multiply(logHeightInFeet, logHeightInFeet)));
                    Vector128 <float> cubicFeet = MathV.Exp10(cvtsl);

                    Vector128 <float> dbhSquared            = Avx.Multiply(dbhInInches, dbhInInches); // could be consolidated by merging other scaling constants with Forester's constant for basal area
                    Vector128 <float> basalAreaInSquareFeet = Avx.Multiply(forestersEnglish, dbhSquared);
                    // b4 = cubicFeet / (1.033F * (1.0F + 1.382937F * MathV.Exp(-4.015292F * dbhInInches / 10.0F)) * (basalAreaInSquareFeet + 0.087266F) - 0.174533F);
                    Vector128 <float> b4 = Avx.Divide(cubicFeet, Avx.Add(Avx.Multiply(v1p033,
                                                                                      Avx.Multiply(Avx.Add(one, Avx.Multiply(v1p382937,
                                                                                                                             MathV.Exp(Avx.Multiply(vm0p4015292,
                                                                                                                                                    dbhInInches)))),
                                                                                                   Avx.Add(basalAreaInSquareFeet, v0p087266))),
                                                                         vm0p174533));
                    Vector128 <float> cv4 = Avx.Multiply(b4, Avx.Subtract(basalAreaInSquareFeet, v0p087266));

                    // conversion to Scribner volumes for 32 foot trees
                    // Waddell 2014:32
                    // rc6 = 0.993F * (1.0F - MathF.Pow(0.62F, dbhInInches - 6.0F));
                    Vector128 <float> rc6   = Avx.Multiply(v0p993, Avx.Subtract(one, MathV.Exp(Avx.Multiply(vm0p6896598794, Avx.Subtract(dbhInInches, six))))); // log2(0.62) = -0.6896598794
                    Vector128 <float> cv6   = Avx.Multiply(rc6, cv4);
                    Vector128 <float> logB4 = MathV.Log10(b4);
                    // float rs616 = MathF.Pow(10.0F, 0.174439F + 0.117594F * logDbhInInches * logB4 - 8.210585F / (dbhInInches * dbhInInches) + 0.236693F * logB4 - 0.00001345F * b4 * b4 - 0.00001937F * dbhInInches * dbhInInches);
                    Vector128 <float> rs616l = Avx.Add(v0p174439, Avx.Multiply(v0p117594, Avx.Multiply(logDbhInInches, logB4)));
                    rs616l = Avx.Add(rs616l, Avx.Divide(vm8p210585, dbhSquared));
                    rs616l = Avx.Add(rs616l, Avx.Multiply(v0p236693, logB4));
                    rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001345, Avx.Multiply(b4, b4)));
                    rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001937, dbhSquared));
                    Vector128 <float> rs616 = MathV.Exp10(rs616l);
                    Vector128 <float> sv616 = Avx.Multiply(rs616, cv6); // Scribner board foot volume to a 6 inch top for 16 foot logs
                    // float rs632 = 1.001491F - 6.924097F / tarif + 0.00001351F * dbhInInches * dbhInInches;
                    Vector128 <float> rs632 = Avx.Add(v1p001491, Avx.Divide(vm6p924097, Avx.Multiply(v0p912733, b4)));
                    rs632 = Avx.Add(rs632, Avx.Multiply(v0p00001351, dbhSquared));
                    Vector128 <float> zeroVolumeMask = Avx.CompareLessThanOrEqual(dbhInInches, six);
                    Vector128 <float> sv632          = Avx.Multiply(rs632, sv616); // Scribner board foot volume to a 6 inch top for 32 foot logs
                    sv632 = Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask);

                    #if DEBUG
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rc6, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(rc6, one));
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs616, one, zeroVolumeMask), one));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs616, Vector128 <float> .Zero, zeroVolumeMask), v6p8));
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), one));
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), v10k));
                    #endif

                    Vector128 <float> expansionFactor = Avx.LoadVector128(expansionFactors + treeIndex);
                    standBoardFeetPerAcre = Avx.Add(standBoardFeetPerAcre, Avx.Multiply(expansionFactor, sv632));
                }

                standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre);
                standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre);
                return(standBoardFeetPerAcre.ToScalar());
            }
        }
예제 #14
0
        public Intro()
        {
            var middleVector = Vector128.Create(1.0f);                      // middleVector = <1,1,1,1>

            middleVector = Vector128.CreateScalar(-1.0f);                   // middleVector = <-1,0,0,0>
            var floatBytes = Vector64.AsByte(Vector64.Create(1.0f, -1.0f)); // floatBytes = <0, 0, 128, 63, 0, 0, 128, 191>

            if (Avx.IsSupported)
            {
                var left  = Vector256.Create(-2.5f);                     // <-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5>
                var right = Vector256.Create(5.0f);                      // <5, 5, 5, 5, 5, 5, 5, 5>
                Vector256 <float> result = Avx.AddSubtract(left, right); // result = <-7.5, 2.5, -7.5, 2.5, -7.5, 2.5, -7.5, 2.5>xit
                left   = Vector256.Create(-1.0f, -2.0f, -3.0f, -4.0f, -50.0f, -60.0f, -70.0f, -80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 4.0f, 50.0f, 60.0f, 70.0f, 80.0f);
                result = Avx.UnpackHigh(left, right);              // result = <-3, 3, -4, 4, -70, 70, -80, 80>
                result = Avx.UnpackLow(left, right);               // result = <-1, 1, -2, 2, -50, 50, -60, 60>
                result = Avx.DotProduct(left, right, 0b1111_0001); // result = <-30, 0, 0, 0, -17400, 0, 0, 0>
                bool testResult = Avx.TestC(left, right);          // testResult = true
                testResult = Avx.TestC(right, left);               // testResult = false
                Vector256 <float> result1 = Avx.Divide(left, right);
                var plusOne = Vector256.Create(1.0f);
                result = Avx.Compare(right, result1, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                result = Avx.Compare(right, result1, FloatComparisonMode.UnorderedNotLessThanNonSignaling);
                left   = Vector256.Create(0.0f, 3.0f, -3.0f, 4.0f, -50.0f, 60.0f, -70.0f, 80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> nanInFirstPosition = Avx.Divide(left, right);
                left = Vector256.Create(1.1f, 3.3333333f, -3.0f, 4.22f, -50.0f, 60.0f, -70.0f, 80.0f);
                Vector256 <float> InfInFirstPosition = Avx.Divide(left, right);

                left  = Vector256.Create(-1.1f, 3.0f, 1.0f / 3.0f, MathF.PI, -50.0f, 60.0f, -70.0f, 80.0f);
                right = Vector256.Create(0.0f, 2.0f, 3.1f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> compareResult = Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                Vector256 <float> mixed         = Avx.BlendVariable(left, right, compareResult);                                //  mixed = <-1, 2, -3, 2, -50, -60, -70, -80>

                //left = Vector256.Create(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f);
                //right = Vector256.Create(1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f);
                Vector256 <float> other = right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                bool bRes    = Avx.TestZ(plusOne, compareResult);
                bool bRes2   = Avx.TestC(plusOne, compareResult);
                bool allTrue = !Avx.TestZ(compareResult, compareResult);
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.OrderedEqualNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.UnorderedEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                var left128  = Vector128.Create(1.0f, 2.0f, 3.0f, 4.0f);
                var right128 = Vector128.Create(2.0f, 3.0f, 4.0f, 5.0f);
                Vector128 <float> compResult128 = Sse.CompareGreaterThan(left128, right128); // compResult128 = <0, 0, 0, 0>

                int res = Avx.MoveMask(compareResult);
                if (Fma.IsSupported)
                {
                    Vector256 <float> resultFma = Fma.MultiplyAdd(left, right, other); // = left * right + other for each element
                    resultFma = Fma.MultiplyAddNegated(left, right, other);            // = -(left * right + other) for each element
                    resultFma = Fma.MultiplySubtract(left, right, other);              // = left * right - other for each element
                    Fma.MultiplyAddSubtract(left, right, other);                       // even elements (0, 2, ...) like MultiplyAdd, odd elements like MultiplySubtract
                }
                result = Avx.DotProduct(left, right, 0b1010_0001);                     // result = <-20, 0, 0, 0, -10000, 0, 0, 0>
                result = Avx.Floor(left);                                              // result = <-3, -3, -3, -3, -3, -3, -3, -3>
                result = Avx.Add(left, right);                                         // result = <2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5>
                result = Avx.Ceiling(left);                                            // result = <-2, -2, -2, -2, -2, -2, -2, -2>
                result = Avx.Multiply(left, right);                                    // result = <-12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5>
                result = Avx.HorizontalAdd(left, right);                               // result = <-5, -5, 10, 10, -5, -5, 10, 10>
                result = Avx.HorizontalSubtract(left, right);                          // result = <0, 0, 0, 0, 0, 0, 0, 0>
                double[] someDoubles      = new double[] { 1.0, 3.0, -2.5, 7.5, 10.8, 0.33333 };
                double[] someOtherDoubles = new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
                double[] someResult       = new double[someDoubles.Length];
                float[]  someFloats       = new float[] { 1, 2, 3, 4, 10, 20, 30, 40, 0 };
                float[]  someOtherFloats  = new float[] { 1, 1, 1, 1, 1, 1, 1, 1 };
                unsafe
                {
                    fixed(double *ptr = &someDoubles[1])
                    {
                        fixed(double *ptr2 = &someResult[0])
                        {
                            Vector256 <double> res2 = Avx.LoadVector256(ptr); // res2 = <3, -2.5, 7.5, 10.8>

                            Avx.Store(ptr2, res2);
                        }
                    }

                    fixed(float *ptr = &someFloats[0])
                    {
                        fixed(float *ptr2 = &someOtherFloats[0])
                        {
                            Vector256 <float> res2 = Avx.DotProduct(Avx.LoadVector256(ptr), Avx.LoadVector256(ptr2), 0b0001_0001);
                            //Avx.Store(ptr2, res2);
                        }
                    }
                }
            }
        }