Exemplo n.º 1
0
        public static byte MoveMask(Vector256 <double> vector)
        {
            if (Avx.IsSupported)
            {
                return((byte)Avx.MoveMask(vector));
            }

            return(SoftwareFallback(vector));
Exemplo n.º 2
0
    static bool TestAvxCompareNotGreaterThanOrEqualDouble()
    {
        if (Avx.IsSupported)
        {
            const int expectedResult = 0b0111;

            Vector256 <double> value1 = Vector256.Create(double.NaN, 1.0, 0.0, 2.0);
            Vector256 <double> value2 = Vector256.Create(0.0, 2.0, double.NaN, 1.0);
            Vector256 <double> result = Avx.CompareNotGreaterThanOrEqual(value1, value2);

            int actualResult = Avx.MoveMask(result);

            if (actualResult != expectedResult)
            {
                Console.WriteLine($"{nameof(Avx)}.{nameof(Avx.CompareNotGreaterThanOrEqual)}({value1}, {value2}) returned {Convert.ToString(actualResult, 2)}; expected {Convert.ToString(expectedResult, 2)}");
                return(false);
            }
        }
        return(true);
    }
Exemplo n.º 3
0
    static bool TestAvxCompareNotGreaterThanSingle()
    {
        if (Avx.IsSupported)
        {
            const int expectedResult = 0b1101_1011;

            Vector256 <float> value1 = Vector256.Create(float.NaN, 1.0f, 2.0f, 3.0f, 0.0f, 2.0f, 1.0f, 3.0f);
            Vector256 <float> value2 = Vector256.Create(0.0f, 2.0f, 1.0f, 3.0f, float.NaN, 1.0f, 2.0f, 3.0f);
            Vector256 <float> result = Avx.CompareNotGreaterThan(value1, value2);

            int actualResult = Avx.MoveMask(result);

            if (actualResult != expectedResult)
            {
                Console.WriteLine($"{nameof(Avx)}.{nameof(Avx.CompareNotGreaterThan)}({value1}, {value2}) returned {Convert.ToString(actualResult, 2)}; expected {Convert.ToString(expectedResult, 2)}");
                return(false);
            }
        }
        return(true);
    }
Exemplo n.º 4
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Avx.IsSupported)
            {
                using (TestTable <float> floatTable = new TestTable <float>(new float[8] {
                    1, -5, 100, 0, 1, -5, 100, 0
                }))
                {
                    var vf1 = Unsafe.Read <Vector256 <float> >(floatTable.inArray1Ptr);
                    var res = Avx.MoveMask(vf1);

                    if (res != 0b00100010)
                    {
                        Console.WriteLine("Avx MoveMask failed on float:");
                        Console.WriteLine(res);
                        testResult = Fail;
                    }
                }

                using (TestTable <double> doubleTable = new TestTable <double>(new double[4] {
                    1, -5, 1, -5
                }))
                {
                    var vf1 = Unsafe.Read <Vector256 <double> >(doubleTable.inArray1Ptr);
                    var res = Avx.MoveMask(vf1);

                    if (res != 0b1010)
                    {
                        Console.WriteLine("Avx MoveMask failed on double:");
                        Console.WriteLine(res);
                        testResult = Fail;
                    }
                }
            }


            return(testResult);
        }
Exemplo n.º 5
0
        public unsafe static Vector128 <float> Exp2(Vector128 <float> power)
        {
            Debug.Assert(Avx.MoveMask(Avx.And(Avx.CompareGreaterThan(power, AvxExtensions.BroadcastScalarToVector128(MathV.FloatMaximumPower)), Avx.CompareOrdered(power, power))) == 0);

            byte zeroMask = (byte)Avx.MoveMask(Avx.CompareLessThan(power, AvxExtensions.BroadcastScalarToVector128(-MathV.FloatMaximumPower)));
            Vector128 <float> integerPart     = Avx.RoundToNearestInteger(power);
            Vector128 <float> integerExponent = Avx.ShiftLeftLogical(Avx.Add(Avx.ConvertToVector128Int32(integerPart), MathV.FloatMantissaZero128), MathV.FloatMantissaBits).AsSingle();

            // evaluate polynomial
            Vector128 <float> beta1 = AvxExtensions.BroadcastScalarToVector128(MathV.Exp2Beta1);
            Vector128 <float> beta2 = AvxExtensions.BroadcastScalarToVector128(MathV.Exp2Beta2);
            Vector128 <float> beta3 = AvxExtensions.BroadcastScalarToVector128(MathV.Exp2Beta3);
            Vector128 <float> beta4 = AvxExtensions.BroadcastScalarToVector128(MathV.Exp2Beta4);

            Vector128 <float> x = Avx.Subtract(power, integerPart); // fractional part
            Vector128 <float> fractionalExponent = AvxExtensions.BroadcastScalarToVector128(MathV.One);

            fractionalExponent = Avx.Add(fractionalExponent, Avx.Multiply(beta1, x));
            Vector128 <float> x2 = Avx.Multiply(x, x);

            fractionalExponent = Avx.Add(fractionalExponent, Avx.Multiply(beta2, x2));
            Vector128 <float> x3 = Avx.Multiply(x2, x);

            fractionalExponent = Avx.Add(fractionalExponent, Avx.Multiply(beta3, x3));
            Vector128 <float> x4 = Avx.Multiply(x3, x);

            fractionalExponent = Avx.Add(fractionalExponent, Avx.Multiply(beta4, x4));

            // form exponent
            Vector128 <float> exponent = Avx.Multiply(integerExponent, fractionalExponent);

            // suppress exponent overflows by truncating values less than 2^-127 to zero
            if (zeroMask != 0)
            {
                exponent = Avx.Blend(exponent, Vector128 <float> .Zero, zeroMask);
            }
            return(exponent);
        }
Exemplo n.º 6
0
        public static unsafe void ComputeSingle(
            uint[,] iterations,
            int startScanline, int increment,
            double offsetX, double offsetY,
            double zoom,
            uint maxIterations,
            ref bool cancel)
        {
            const int stride = 8;

            int height = iterations.GetLength(0);
            int width  = iterations.GetLength(1);

            var maxIter = Vector256.Create((float)maxIterations);
            var limit   = Vector256.Create(4.0f);
            var one     = Vector256.Create(1.0f);
            var two     = Vector256.Create(2.0f);

            float *results = stackalloc float[stride];

            for (int i = startScanline; i < height && !cancel; i += increment)
            {
                for (int j = 0; j < width && !cancel; j += stride)
                {
                    var c0 = Impl.GetPointCoordinate(j + 0, i, width, height, offsetX, offsetY, zoom);
                    var c1 = Impl.GetPointCoordinate(j + 1, i, width, height, offsetX, offsetY, zoom);
                    var c2 = Impl.GetPointCoordinate(j + 2, i, width, height, offsetX, offsetY, zoom);
                    var c3 = Impl.GetPointCoordinate(j + 3, i, width, height, offsetX, offsetY, zoom);
                    var c4 = Impl.GetPointCoordinate(j + 4, i, width, height, offsetX, offsetY, zoom);
                    var c5 = Impl.GetPointCoordinate(j + 5, i, width, height, offsetX, offsetY, zoom);
                    var c6 = Impl.GetPointCoordinate(j + 6, i, width, height, offsetX, offsetY, zoom);
                    var c7 = Impl.GetPointCoordinate(j + 7, i, width, height, offsetX, offsetY, zoom);

                    var cr = Vector256.Create((float)c0.X, (float)c1.X, (float)c2.X, (float)c3.X, (float)c4.X, (float)c5.X, (float)c6.X, (float)c7.X);
                    var ci = Vector256.Create((float)c0.Y, (float)c1.Y, (float)c2.Y, (float)c3.Y, (float)c4.Y, (float)c5.Y, (float)c6.Y, (float)c7.Y);
                    var zr = cr;
                    var zi = ci;
                    var it = Vector256.Create(0f);

                    for (;;)
                    {
                        var zr2 = Avx.Multiply(zr, zr);
                        var zi2 = Avx.Multiply(zi, zi);
                        var squaredMagnitude = Avx.Add(zr2, zi2);

                        var cond = Avx.And(
                            Avx.Compare(squaredMagnitude, limit, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling),
                            Avx.Compare(it, maxIter, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling));

                        if (Avx.MoveMask(cond) == 0)
                        {
                            Avx.Store(results, it);

                            if (j + 0 < width)
                            {
                                iterations[i, j + 0] = (uint)results[0] % maxIterations;
                            }
                            if (j + 1 < width)
                            {
                                iterations[i, j + 1] = (uint)results[1] % maxIterations;
                            }
                            if (j + 2 < width)
                            {
                                iterations[i, j + 2] = (uint)results[2] % maxIterations;
                            }
                            if (j + 3 < width)
                            {
                                iterations[i, j + 3] = (uint)results[3] % maxIterations;
                            }
                            if (j + 4 < width)
                            {
                                iterations[i, j + 4] = (uint)results[4] % maxIterations;
                            }
                            if (j + 5 < width)
                            {
                                iterations[i, j + 5] = (uint)results[5] % maxIterations;
                            }
                            if (j + 6 < width)
                            {
                                iterations[i, j + 6] = (uint)results[6] % maxIterations;
                            }
                            if (j + 7 < width)
                            {
                                iterations[i, j + 7] = (uint)results[7] % maxIterations;
                            }
                            break;
                        }

                        zi = Fma.MultiplyAdd(two, Avx.Multiply(zr, zi), ci);
                        zr = Avx.Add(Avx.Subtract(zr2, zi2), cr);
                        it = Avx.Add(it, Avx.And(one, cond));
                    }
                }
            }
        }
        public int IndexOfFirstElementGreaterOrEqualToLimit_Avx()
        {
            var   values = this.values;
            float limit  = this.limitToFind;

            if (Avx.IsSupported)
            {
                unsafe
                {
                    fixed(float *valuesPtr = values)
                    {
                        const int ElementsPerByte = sizeof(float) / sizeof(byte);
                        var       alignmentOffset = (long)(uint)(-(int)valuesPtr / ElementsPerByte) & (Vector256 <float> .Count - 1);

                        // handle first values sequentially until we hit the 256bit alignment boundary
                        for (long i = 0; i < alignmentOffset; i++)
                        {
                            if (*(valuesPtr + i) >= limit)
                            {
                                return((int)i);
                            }
                        }

                        var remainingLength    = values.Length - alignmentOffset;
                        var vectorizableLength = values.Length - remainingLength % (long)Vector256 <float> .Count;

                        // handle vectorizable items
                        var limitVector = Vector256.Create(limit);

                        for (var i = alignmentOffset; i < vectorizableLength; i += Vector256 <float> .Count)
                        {
                            var valuesVector           = Avx.LoadAlignedVector256(valuesPtr + i);
                            var comparisonResultVector = Avx.Compare(valuesVector, limitVector, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling);

                            // create int bitmask from vector bitmask
                            // the first bit (right-to-left) that is 1 indicates a comparision yielding true
                            var comparisonResult = (uint)Avx.MoveMask(comparisonResultVector);

                            if (comparisonResult == 0)
                            {
                                // no element of the vector matches the compare criteria
                                continue;
                            }

                            // a match was found
                            var matchedLocation = i + Bmi1.TrailingZeroCount(comparisonResult);
                            return((int)matchedLocation);
                        }

                        // handle remaining items
                        for (var i = (int)vectorizableLength; i < values.Length; i++)
                        {
                            if (values[i] >= limit)
                            {
                                return(i);
                            }
                        }

                        return(-1);
                    }
                }
            }
            else
            {
                for (int i = 0; i < values.Length; i++)
                {
                    if (values[i] >= limit)
                    {
                        return(i);
                    }
                }
                return(-1);
            }
        }
Exemplo n.º 8
0
        public unsafe void Vector256Mandel()
        {
            int countX = 0, countY = 0;
            int maxInter = 256;
            int inter;
            ReadOnlySpan <float> ySpan = yPoints.Span;
            ReadOnlySpan <Vector256 <float> > xSpan = MemoryMarshal.Cast <float, Vector256 <float> >(xPoints.Span);
            Span <Vector256 <float> >         res   = MemoryMarshal.Cast <float, Vector256 <float> >(results.Span);
            int resVectorNumber = 0;

            Vector256 <float> xVec, yVec;
            Vector256 <float> zeroVec = Vector256 <float> .Zero;
            var oneVec   = Vector256.Create(1.0f);
            var fourVec  = Vector256.Create(4.0f);
            var one4Vec  = Vector256.Create(0.25f);
            var one16Vec = Vector256.Create(1.0f / 16.0f);
            Vector256 <float> qVec;
            Vector256 <float> test;

            while (countY < ySpan.Length)
            {
                var currYVec = Vector256.Create(ySpan[countY]);
                while (countX < xSpan.Length)
                {
                    Vector256 <float> currXVec = xSpan[countX];
                    Vector256 <float> xSquVec  = zeroVec;
                    Vector256 <float> ySquVec  = zeroVec;
                    Vector256 <float> zSquVec  = zeroVec;
                    Vector256 <float> interVec = zeroVec;
                    Vector256 <float> sumVector;

                    inter = 0;
                    bool goOn;
                    Vector256 <float> temp  = Avx.Subtract(currXVec, one4Vec);
                    Vector256 <float> temp1 = Avx.Multiply(currYVec, currYVec);
                    qVec = Avx.Add(Avx.Multiply(temp, temp), temp1);
                    Vector256 <float> temp2 = Avx.Multiply(qVec, Avx.Add(qVec, temp));
                    test = Avx.Compare(temp2, Avx.Multiply(one4Vec, temp1), FloatComparisonMode.OrderedGreaterThanNonSignaling);
                    goOn = (Avx.MoveMask(test) > 0);
                    if (goOn)
                    {
                        temp2 = Avx.Add(currXVec, oneVec);
                        temp  = Avx.Add(Avx.Multiply(temp2, temp2), temp1);
                        test  = Avx.Compare(temp, one16Vec, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                        goOn  = Avx.MoveMask(test) > 0;
                        if (!goOn)
                        {
                            interVec = Vector256.Create(255.0f); // make all point = maximum value
                        }
                    }
                    while (goOn)
                    {
                        xVec      = Avx.Add(Avx.Subtract(xSquVec, ySquVec), currXVec);
                        yVec      = Avx.Add(Avx.Subtract(Avx.Subtract(zSquVec, ySquVec), xSquVec), currYVec);
                        xSquVec   = Avx.Multiply(xVec, xVec);
                        ySquVec   = Avx.Multiply(yVec, yVec);
                        temp      = Avx.Add(xVec, yVec);
                        zSquVec   = Avx.Multiply(temp, temp);
                        test      = Avx.Compare(Avx.Add(xSquVec, ySquVec), fourVec, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling); // <= 4.0?
                        sumVector = Avx.BlendVariable(zeroVec, oneVec, test);

                        goOn = (Avx.MoveMask(test) > 0) & (inter < maxInter); //any of the values still alive, and inter still below cutoff value?
                        if (goOn)
                        {
                            interVec = Avx.Add(interVec, sumVector);
                        }
                        inter = goOn ? inter + 1 : inter;
                    }
                    res[resVectorNumber] = interVec;
                    resVectorNumber++;
                    countX++;
                }
                countX = 0;
                countY++;
            }
        }
Exemplo n.º 9
0
 public static void IsTrue(Vector128 <float> comparison)
 {
     Assert.IsTrue(Avx.MoveMask(comparison) == Constant.Simd128x4.MaskAllTrue);
 }
        private static unsafe bool TryFindZero(Storage <float> costs, [NotNull] bool[] rowsCovered, [NotNull] bool[] colsCovered, out Location zeroLocation)
        {
            if (rowsCovered == null)
            {
                throw new ArgumentNullException(nameof(rowsCovered));
            }

            if (colsCovered == null)
            {
                throw new ArgumentNullException(nameof(colsCovered));
            }

            if (Avx2.IsSupported && costs.RowCount >= Vector256 <float> .Count)
            {
                var rowCount        = costs.RowCount;
                var columnCount     = costs.ColumnCount;
                var storage         = costs.ColumnMajorBackingStore;
                var maxVectorOffset = rowCount - rowCount % Vector256 <float> .Count;
                var zeroVector      = Vector256 <float> .Zero;

                var coveredMasks = new int[maxVectorOffset / Vector256 <float> .Count];
                for (var i = 0; i < maxVectorOffset; i += Vector256 <float> .Count)
                {
                    coveredMasks[i / Vector256 <float> .Count] = (rowsCovered[i] ? 0 : 1)
                                                                 | (rowsCovered[i + 1] ? 0 : 2)
                                                                 | (rowsCovered[i + 2] ? 0 : 4)
                                                                 | (rowsCovered[i + 3] ? 0 : 8)
                                                                 | (rowsCovered[i + 4] ? 0 : 16)
                                                                 | (rowsCovered[i + 5] ? 0 : 32)
                                                                 | (rowsCovered[i + 6] ? 0 : 64)
                                                                 | (rowsCovered[i + 7] ? 0 : 128);
                }

                fixed(float *storagePtr = storage)
                {
                    for (var column = 0; column < columnCount; column++)
                    {
                        if (!colsCovered[column])
                        {
                            var basePtr = storagePtr + rowCount * column;
                            for (int row = 0, rowBatchIndex = 0; row < maxVectorOffset; row += Vector256 <float> .Count, rowBatchIndex++)
                            {
                                var rowVector        = Avx.LoadVector256(basePtr + row);
                                var comparisonResult = Avx.Compare(rowVector, zeroVector, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling);
                                var equality         = (uint)Avx.MoveMask(comparisonResult);

                                if (equality == 0)
                                {
                                    continue;
                                }

                                equality &= (uint)coveredMasks[rowBatchIndex];

                                if (equality == 0)
                                {
                                    continue;
                                }

                                var zeroRow = row + (int)Bmi1.TrailingZeroCount(equality);
                                zeroLocation = new Location(zeroRow, column);
                                return(true);
                            }

                            for (var i = maxVectorOffset; i < rowCount; i++)
                            {
                                if (!rowsCovered[i] && storage[column * rowCount + i] <= 0)
                                {
                                    zeroLocation = new Location(i, column);
                                    return(true);
                                }
                            }
                        }
                    }
                }
            }
            else
            {
                for (var column = 0; column < costs.ColumnCount; column++)
                {
                    if (colsCovered[column])
                    {
                        continue;
                    }

                    for (var row = 0; row < costs.RowCount; row++)
                    {
                        if (!rowsCovered[row] && costs.ColumnMajorBackingStore[column * costs.RowCount + row] <= 0)
                        {
                            zeroLocation = new Location(row, column);
                            return(true);
                        }
                    }
                }
            }

            zeroLocation = new Location(-1, -1);
            return(false);
        }
Exemplo n.º 11
0
        public unsafe void Vector256Mandel()
        {
            int floatL3Size = TOTALBYTES / sizeof(float);

            resolutionX = (int)MathF.Floor(MathF.Sqrt(floatL3Size * ratioy_x));
            if (resolutionX % 8 != 0)
            {
                resolutionX -= resolutionX % 8;
            }
            resolutionY = (int)MathF.Floor(resolutionX * ratioy_x);
            if (resolutionY % 8 != 0)
            {
                resolutionY -= resolutionY % 8;
            }
            STEP_X         = (RIGHT_X - LEFT_X) / resolutionX;
            STEP_Y         = STEP_X; // ratioy_x * STEP_X; Bug from reddit comment
            numberOfPoints = resolutionX * resolutionY;
            results2       = new float[numberOfPoints];

            xPoints = new float[resolutionX];
            yPoints = new float[resolutionY];
            for (int i = 0; i < resolutionX; i++)
            {
                xPoints.Span[i] = LEFT_X + i * STEP_X;
            }
            for (int i = 0; i < resolutionY; i++)
            {
                yPoints.Span[i] = TOP_Y - i * STEP_Y;
            }

            int countX = 0, countY = 0;
            int maxInter = 256;
            int inter;
            ReadOnlySpan <float> ySpan = yPoints.Span;// MemoryMarshal.Cast<float, Vector256<float>>(yPoints.Span);
            ReadOnlySpan <Vector256 <float> > xSpan    = MemoryMarshal.Cast <float, Vector256 <float> >(xPoints.Span);
            Span <Vector256 <float> >         res      = MemoryMarshal.Cast <float, Vector256 <float> >(results2.Span);
            Span <Vector256 <float> >         testSpan = MemoryMarshal.Cast <float, Vector256 <float> >(testValue2.Span);
            int resVectorNumber = 0;

            Vector256 <float> xVec, yVec;
            var oneVec  = Vector256.Create(1.0f);
            var fourVec = Vector256.Create(4.0f);

            while (countY < ySpan.Length)
            {
                var currYVec = Vector256.Create(ySpan[countY]);
                while (countX < xSpan.Length)
                {
                    Vector256 <float> currXVec = xSpan[countX];
                    var xSquVec  = Vector256.Create(0.0f);
                    var ySquVec  = Vector256.Create(0.0f);
                    var zSquVec  = Vector256.Create(0.0f);
                    var interVec = Vector256.Create(0.0f);
                    Vector256 <float> sumVector = oneVec;
                    inter = 0;
                    bool goOn = true;
                    while (goOn)
                    {
                        xVec    = Avx.Add(Avx.Subtract(xSquVec, ySquVec), currXVec);
                        yVec    = Avx.Add(Avx.Subtract(Avx.Subtract(zSquVec, ySquVec), xSquVec), currYVec);
                        xSquVec = Avx.Multiply(xVec, xVec);
                        ySquVec = Avx.Multiply(yVec, yVec);
                        zSquVec = Avx.Multiply(Avx.Add(xVec, yVec), Avx.Add(xVec, yVec));
                        Vector256 <float> test = Avx.Compare(Avx.Add(xSquVec, ySquVec), fourVec, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling); // <= 4.0?
                        sumVector = Avx.BlendVariable(Vector256 <float> .Zero, sumVector, test);                                                          // selects from second if true, from first otherwise
                        goOn      = (Avx.MoveMask(test) > 0) & (inter < maxInter);                                                                        //any of the values still alive, and inter still below cutoff value?
                        if (goOn)
                        {
                            interVec = Avx.Add(interVec, sumVector);
                        }
                        inter = goOn ? inter + 1 : inter;
                    }
                    testSpan[resVectorNumber] = Avx.Add(xSquVec, ySquVec);
                    res[resVectorNumber]      = interVec;
                    resVectorNumber++;
                    countX++;
                }
                countX = 0;
                countY++;
            }
        }
Exemplo n.º 12
0
 // True if all values have all bits set (SIMD-variant of true), false otherwise
 // see https://habr.com/en/post/467689/
 public static bool All(this IEnumerable <Vector256 <double> > @this)
 => @this.Select(v => Avx.MoveMask(v)).All(i => i == TrueMask);
Exemplo n.º 13
0
 private static bool NotEqual(Vector256 <double> vector1, Vector256 <double> vector2)
 {
     return(Avx.MoveMask(Avx.Compare(vector1, vector2, FloatComparisonMode.OrderedNotEqualNonSignaling)) != 0);
 }
Exemplo n.º 14
0
 public static void Assert(Vector128 <float> condition)
 {
     Debug.Assert(Avx.MoveMask(condition) == Constant.Simd128x4.MaskAllTrue);
 }
Exemplo n.º 15
0
        public Intro()
        {
            var middleVector = Vector128.Create(1.0f);                      // middleVector = <1,1,1,1>

            middleVector = Vector128.CreateScalar(-1.0f);                   // middleVector = <-1,0,0,0>
            var floatBytes = Vector64.AsByte(Vector64.Create(1.0f, -1.0f)); // floatBytes = <0, 0, 128, 63, 0, 0, 128, 191>

            if (Avx.IsSupported)
            {
                var left  = Vector256.Create(-2.5f);                     // <-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5>
                var right = Vector256.Create(5.0f);                      // <5, 5, 5, 5, 5, 5, 5, 5>
                Vector256 <float> result = Avx.AddSubtract(left, right); // result = <-7.5, 2.5, -7.5, 2.5, -7.5, 2.5, -7.5, 2.5>xit
                left   = Vector256.Create(-1.0f, -2.0f, -3.0f, -4.0f, -50.0f, -60.0f, -70.0f, -80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 4.0f, 50.0f, 60.0f, 70.0f, 80.0f);
                result = Avx.UnpackHigh(left, right);              // result = <-3, 3, -4, 4, -70, 70, -80, 80>
                result = Avx.UnpackLow(left, right);               // result = <-1, 1, -2, 2, -50, 50, -60, 60>
                result = Avx.DotProduct(left, right, 0b1111_0001); // result = <-30, 0, 0, 0, -17400, 0, 0, 0>
                bool testResult = Avx.TestC(left, right);          // testResult = true
                testResult = Avx.TestC(right, left);               // testResult = false
                Vector256 <float> result1 = Avx.Divide(left, right);
                var plusOne = Vector256.Create(1.0f);
                result = Avx.Compare(right, result1, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                result = Avx.Compare(right, result1, FloatComparisonMode.UnorderedNotLessThanNonSignaling);
                left   = Vector256.Create(0.0f, 3.0f, -3.0f, 4.0f, -50.0f, 60.0f, -70.0f, 80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> nanInFirstPosition = Avx.Divide(left, right);
                left = Vector256.Create(1.1f, 3.3333333f, -3.0f, 4.22f, -50.0f, 60.0f, -70.0f, 80.0f);
                Vector256 <float> InfInFirstPosition = Avx.Divide(left, right);

                left  = Vector256.Create(-1.1f, 3.0f, 1.0f / 3.0f, MathF.PI, -50.0f, 60.0f, -70.0f, 80.0f);
                right = Vector256.Create(0.0f, 2.0f, 3.1f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> compareResult = Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                Vector256 <float> mixed         = Avx.BlendVariable(left, right, compareResult);                                //  mixed = <-1, 2, -3, 2, -50, -60, -70, -80>

                //left = Vector256.Create(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f);
                //right = Vector256.Create(1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f);
                Vector256 <float> other = right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                bool bRes    = Avx.TestZ(plusOne, compareResult);
                bool bRes2   = Avx.TestC(plusOne, compareResult);
                bool allTrue = !Avx.TestZ(compareResult, compareResult);
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.OrderedEqualNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.UnorderedEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                var left128  = Vector128.Create(1.0f, 2.0f, 3.0f, 4.0f);
                var right128 = Vector128.Create(2.0f, 3.0f, 4.0f, 5.0f);
                Vector128 <float> compResult128 = Sse.CompareGreaterThan(left128, right128); // compResult128 = <0, 0, 0, 0>

                int res = Avx.MoveMask(compareResult);
                if (Fma.IsSupported)
                {
                    Vector256 <float> resultFma = Fma.MultiplyAdd(left, right, other); // = left * right + other for each element
                    resultFma = Fma.MultiplyAddNegated(left, right, other);            // = -(left * right + other) for each element
                    resultFma = Fma.MultiplySubtract(left, right, other);              // = left * right - other for each element
                    Fma.MultiplyAddSubtract(left, right, other);                       // even elements (0, 2, ...) like MultiplyAdd, odd elements like MultiplySubtract
                }
                result = Avx.DotProduct(left, right, 0b1010_0001);                     // result = <-20, 0, 0, 0, -10000, 0, 0, 0>
                result = Avx.Floor(left);                                              // result = <-3, -3, -3, -3, -3, -3, -3, -3>
                result = Avx.Add(left, right);                                         // result = <2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5>
                result = Avx.Ceiling(left);                                            // result = <-2, -2, -2, -2, -2, -2, -2, -2>
                result = Avx.Multiply(left, right);                                    // result = <-12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5>
                result = Avx.HorizontalAdd(left, right);                               // result = <-5, -5, 10, 10, -5, -5, 10, 10>
                result = Avx.HorizontalSubtract(left, right);                          // result = <0, 0, 0, 0, 0, 0, 0, 0>
                double[] someDoubles      = new double[] { 1.0, 3.0, -2.5, 7.5, 10.8, 0.33333 };
                double[] someOtherDoubles = new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
                double[] someResult       = new double[someDoubles.Length];
                float[]  someFloats       = new float[] { 1, 2, 3, 4, 10, 20, 30, 40, 0 };
                float[]  someOtherFloats  = new float[] { 1, 1, 1, 1, 1, 1, 1, 1 };
                unsafe
                {
                    fixed(double *ptr = &someDoubles[1])
                    {
                        fixed(double *ptr2 = &someResult[0])
                        {
                            Vector256 <double> res2 = Avx.LoadVector256(ptr); // res2 = <3, -2.5, 7.5, 10.8>

                            Avx.Store(ptr2, res2);
                        }
                    }

                    fixed(float *ptr = &someFloats[0])
                    {
                        fixed(float *ptr2 = &someOtherFloats[0])
                        {
                            Vector256 <float> res2 = Avx.DotProduct(Avx.LoadVector256(ptr), Avx.LoadVector256(ptr2), 0b0001_0001);
                            //Avx.Store(ptr2, res2);
                        }
                    }
                }
            }
        }