Beispiel #1
0
        public void RunClassFldScenario()
        {
            var result = Fma.MultiplySubtract(_fld1, _fld2, _fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
        }
            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplySubtractDouble testClass)
            {
                var result = Fma.MultiplySubtract(_fld1, _fld2, _fld3);

                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr);
            }
Beispiel #3
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Fma.MultiplySubtract(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Beispiel #4
0
        public void RunClassLclFldScenario()
        {
            var test   = new SimpleTernaryOpTest__MultiplySubtractSingle();
            var result = Fma.MultiplySubtract(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Fma.MultiplySubtract(_fld1, _fld2, _fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
        }
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Fma.MultiplySubtract(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleTernaryOpTest__MultiplySubtractDouble();
            var result = Fma.MultiplySubtract(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Beispiel #8
0
        public void RunLclVarScenario_LoadAligned()
        {
            var firstOp  = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr));
            var secondOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr));
            var thirdOp  = Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr));
            var result   = Fma.MultiplySubtract(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Beispiel #9
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var firstOp  = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr);
            var secondOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr);
            var thirdOp  = Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr);
            var result   = Fma.MultiplySubtract(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
        public void RunBasicScenario_Load()
        {
            var result = Fma.MultiplySubtract(
                Sse2.LoadVector128((Double *)(_dataTable.inArray1Ptr)),
                Sse2.LoadVector128((Double *)(_dataTable.inArray2Ptr)),
                Sse2.LoadVector128((Double *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Beispiel #11
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Fma.MultiplySubtract(
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Beispiel #12
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Fma.MultiplySubtract(
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray1Ptr)),
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray2Ptr)),
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Beispiel #13
0
        public void RunClsVarScenario()
        {
            var result = Fma.MultiplySubtract(
                _clsVar1,
                _clsVar2,
                _clsVar3
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray2Ptr);
            var op3    = Unsafe.Read <Vector256 <Double> >(_dataTable.inArray3Ptr);
            var result = Fma.MultiplySubtract(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var firstOp  = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr));
            var secondOp = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr));
            var thirdOp  = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray3Ptr));
            var result   = Fma.MultiplySubtract(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Beispiel #16
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var firstOp  = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr);
            var secondOp = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr);
            var thirdOp  = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray3Ptr);
            var result   = Fma.MultiplySubtract(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray1Ptr));
            var op2    = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray2Ptr));
            var op3    = Avx.LoadAlignedVector256((Double *)(_dataTable.inArray3Ptr));
            var result = Fma.MultiplySubtract(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
Beispiel #18
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Fma.MultiplySubtract(
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Beispiel #19
0
        public void RunBasicScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));

            var result = Fma.MultiplySubtract(
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)),
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)),
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Fma.MultiplySubtract(
                Avx.LoadVector256((Double *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector256((Double *)(_dataTable.inArray2Ptr)),
                Avx.LoadVector256((Double *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Fma.MultiplySubtract(
                Avx.LoadVector256((Double *)(&test._fld1)),
                Avx.LoadVector256((Double *)(&test._fld2)),
                Avx.LoadVector256((Double *)(&test._fld3))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
            public void RunStructFldScenario_Load(SimpleTernaryOpTest__MultiplySubtractDouble testClass)
            {
                fixed(Vector256 <Double> *pFld1 = &_fld1)
                fixed(Vector256 <Double> *pFld2 = &_fld2)
                fixed(Vector256 <Double> *pFld3 = &_fld3)
                {
                    var result = Fma.MultiplySubtract(
                        Avx.LoadVector256((Double *)(pFld1)),
                        Avx.LoadVector256((Double *)(pFld2)),
                        Avx.LoadVector256((Double *)(pFld3))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr);
                }
            }
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector256 <Double> *pClsVar1 = &_clsVar1)
            fixed(Vector256 <Double> *pClsVar2 = &_clsVar2)
            fixed(Vector256 <Double> *pClsVar3 = &_clsVar3)
            {
                var result = Fma.MultiplySubtract(
                    Avx.LoadVector256((Double *)(pClsVar1)),
                    Avx.LoadVector256((Double *)(pClsVar2)),
                    Avx.LoadVector256((Double *)(pClsVar3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
            }
        }
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector256 <Double> *pFld1 = &_fld1)
            fixed(Vector256 <Double> *pFld2 = &_fld2)
            fixed(Vector256 <Double> *pFld3 = &_fld3)
            {
                var result = Fma.MultiplySubtract(
                    Avx.LoadVector256((Double *)(pFld1)),
                    Avx.LoadVector256((Double *)(pFld2)),
                    Avx.LoadVector256((Double *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
            }
        }
Beispiel #25
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleTernaryOpTest__MultiplySubtractSingle();

            fixed(Vector256 <Single> *pFld1 = &test._fld1)
            fixed(Vector256 <Single> *pFld2 = &test._fld2)
            fixed(Vector256 <Single> *pFld3 = &test._fld3)
            {
                var result = Fma.MultiplySubtract(
                    Avx.LoadVector256((Single *)(pFld1)),
                    Avx.LoadVector256((Single *)(pFld2)),
                    Avx.LoadVector256((Single *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
            }
        }
        public Intro()
        {
            var middleVector = Vector128.Create(1.0f);                      // middleVector = <1,1,1,1>

            middleVector = Vector128.CreateScalar(-1.0f);                   // middleVector = <-1,0,0,0>
            var floatBytes = Vector64.AsByte(Vector64.Create(1.0f, -1.0f)); // floatBytes = <0, 0, 128, 63, 0, 0, 128, 191>

            if (Avx.IsSupported)
            {
                var left  = Vector256.Create(-2.5f);                     // <-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5>
                var right = Vector256.Create(5.0f);                      // <5, 5, 5, 5, 5, 5, 5, 5>
                Vector256 <float> result = Avx.AddSubtract(left, right); // result = <-7.5, 2.5, -7.5, 2.5, -7.5, 2.5, -7.5, 2.5>xit
                left   = Vector256.Create(-1.0f, -2.0f, -3.0f, -4.0f, -50.0f, -60.0f, -70.0f, -80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 4.0f, 50.0f, 60.0f, 70.0f, 80.0f);
                result = Avx.UnpackHigh(left, right);              // result = <-3, 3, -4, 4, -70, 70, -80, 80>
                result = Avx.UnpackLow(left, right);               // result = <-1, 1, -2, 2, -50, 50, -60, 60>
                result = Avx.DotProduct(left, right, 0b1111_0001); // result = <-30, 0, 0, 0, -17400, 0, 0, 0>
                bool testResult = Avx.TestC(left, right);          // testResult = true
                testResult = Avx.TestC(right, left);               // testResult = false
                Vector256 <float> result1 = Avx.Divide(left, right);
                var plusOne = Vector256.Create(1.0f);
                result = Avx.Compare(right, result1, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                result = Avx.Compare(right, result1, FloatComparisonMode.UnorderedNotLessThanNonSignaling);
                left   = Vector256.Create(0.0f, 3.0f, -3.0f, 4.0f, -50.0f, 60.0f, -70.0f, 80.0f);
                right  = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> nanInFirstPosition = Avx.Divide(left, right);
                left = Vector256.Create(1.1f, 3.3333333f, -3.0f, 4.22f, -50.0f, 60.0f, -70.0f, 80.0f);
                Vector256 <float> InfInFirstPosition = Avx.Divide(left, right);

                left  = Vector256.Create(-1.1f, 3.0f, 1.0f / 3.0f, MathF.PI, -50.0f, 60.0f, -70.0f, 80.0f);
                right = Vector256.Create(0.0f, 2.0f, 3.1f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                Vector256 <float> compareResult = Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                Vector256 <float> mixed         = Avx.BlendVariable(left, right, compareResult);                                //  mixed = <-1, 2, -3, 2, -50, -60, -70, -80>

                //left = Vector256.Create(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f);
                //right = Vector256.Create(1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f);
                Vector256 <float> other = right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f);
                bool bRes    = Avx.TestZ(plusOne, compareResult);
                bool bRes2   = Avx.TestC(plusOne, compareResult);
                bool allTrue = !Avx.TestZ(compareResult, compareResult);
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.OrderedEqualNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN>
                compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.UnorderedEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling);
                compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.OrderedGreaterThanNonSignaling);
                var left128  = Vector128.Create(1.0f, 2.0f, 3.0f, 4.0f);
                var right128 = Vector128.Create(2.0f, 3.0f, 4.0f, 5.0f);
                Vector128 <float> compResult128 = Sse.CompareGreaterThan(left128, right128); // compResult128 = <0, 0, 0, 0>

                int res = Avx.MoveMask(compareResult);
                if (Fma.IsSupported)
                {
                    Vector256 <float> resultFma = Fma.MultiplyAdd(left, right, other); // = left * right + other for each element
                    resultFma = Fma.MultiplyAddNegated(left, right, other);            // = -(left * right + other) for each element
                    resultFma = Fma.MultiplySubtract(left, right, other);              // = left * right - other for each element
                    Fma.MultiplyAddSubtract(left, right, other);                       // even elements (0, 2, ...) like MultiplyAdd, odd elements like MultiplySubtract
                }
                result = Avx.DotProduct(left, right, 0b1010_0001);                     // result = <-20, 0, 0, 0, -10000, 0, 0, 0>
                result = Avx.Floor(left);                                              // result = <-3, -3, -3, -3, -3, -3, -3, -3>
                result = Avx.Add(left, right);                                         // result = <2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5>
                result = Avx.Ceiling(left);                                            // result = <-2, -2, -2, -2, -2, -2, -2, -2>
                result = Avx.Multiply(left, right);                                    // result = <-12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5>
                result = Avx.HorizontalAdd(left, right);                               // result = <-5, -5, 10, 10, -5, -5, 10, 10>
                result = Avx.HorizontalSubtract(left, right);                          // result = <0, 0, 0, 0, 0, 0, 0, 0>
                double[] someDoubles      = new double[] { 1.0, 3.0, -2.5, 7.5, 10.8, 0.33333 };
                double[] someOtherDoubles = new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
                double[] someResult       = new double[someDoubles.Length];
                float[]  someFloats       = new float[] { 1, 2, 3, 4, 10, 20, 30, 40, 0 };
                float[]  someOtherFloats  = new float[] { 1, 1, 1, 1, 1, 1, 1, 1 };
                unsafe
                {
                    fixed(double *ptr = &someDoubles[1])
                    {
                        fixed(double *ptr2 = &someResult[0])
                        {
                            Vector256 <double> res2 = Avx.LoadVector256(ptr); // res2 = <3, -2.5, 7.5, 10.8>

                            Avx.Store(ptr2, res2);
                        }
                    }

                    fixed(float *ptr = &someFloats[0])
                    {
                        fixed(float *ptr2 = &someOtherFloats[0])
                        {
                            Vector256 <float> res2 = Avx.DotProduct(Avx.LoadVector256(ptr), Avx.LoadVector256(ptr2), 0b0001_0001);
                            //Avx.Store(ptr2, res2);
                        }
                    }
                }
            }
        }
            unsafe void IConversionProcessor.ConvertLine(byte *ipstart, byte *opstart, int cb)
            {
                fixed(float *atstart = &valueTable[0])
                {
                    byte * ip = ipstart, ipe = ipstart + cb;
                    float *op = (float *)opstart, at = atstart;

#if HWINTRINSICS
                    if (Avx2.IsSupported)
                    {
                        var vscal = Vector256.Create(scale);
                        var voffs = Fma.IsSupported ? Vector256.Create(offset * scale) : Vector256.Create(offset);

                        ipe -= Vector256 <byte> .Count;
                        while (ip <= ipe)
                        {
                            var vi0 = Avx2.ConvertToVector256Int32(ip);
                            var vi1 = Avx2.ConvertToVector256Int32(ip + Vector256 <int> .Count);
                            var vi2 = Avx2.ConvertToVector256Int32(ip + Vector256 <int> .Count * 2);
                            var vi3 = Avx2.ConvertToVector256Int32(ip + Vector256 <int> .Count * 3);
                            ip += Vector256 <byte> .Count;

                            var vf0 = Avx.ConvertToVector256Single(vi0);
                            var vf1 = Avx.ConvertToVector256Single(vi1);
                            var vf2 = Avx.ConvertToVector256Single(vi2);
                            var vf3 = Avx.ConvertToVector256Single(vi3);

                            if (Fma.IsSupported)
                            {
                                vf0 = Fma.MultiplySubtract(vf0, vscal, voffs);
                                vf1 = Fma.MultiplySubtract(vf1, vscal, voffs);
                                vf2 = Fma.MultiplySubtract(vf2, vscal, voffs);
                                vf3 = Fma.MultiplySubtract(vf3, vscal, voffs);
                            }
                            else
                            {
                                vf0 = Avx.Multiply(Avx.Subtract(vf0, voffs), vscal);
                                vf1 = Avx.Multiply(Avx.Subtract(vf1, voffs), vscal);
                                vf2 = Avx.Multiply(Avx.Subtract(vf2, voffs), vscal);
                                vf3 = Avx.Multiply(Avx.Subtract(vf3, voffs), vscal);
                            }

                            Avx.Store(op, vf0);
                            Avx.Store(op + Vector256 <int> .Count, vf1);
                            Avx.Store(op + Vector256 <int> .Count * 2, vf2);
                            Avx.Store(op + Vector256 <int> .Count * 3, vf3);
                            op += Vector256 <byte> .Count;
                        }
                        ipe += Vector256 <byte> .Count;
                    }
                    else if (Sse41.IsSupported)
                    {
                        var vscal = Vector128.Create(scale);
                        var voffs = Vector128.Create(offset);

                        ipe -= Vector128 <byte> .Count;
                        while (ip <= ipe)
                        {
                            var vi0 = Sse41.ConvertToVector128Int32(ip);
                            var vi1 = Sse41.ConvertToVector128Int32(ip + Vector128 <int> .Count);
                            var vi2 = Sse41.ConvertToVector128Int32(ip + Vector128 <int> .Count * 2);
                            var vi3 = Sse41.ConvertToVector128Int32(ip + Vector128 <int> .Count * 3);
                            ip += Vector128 <byte> .Count;

                            var vf0 = Sse2.ConvertToVector128Single(vi0);
                            var vf1 = Sse2.ConvertToVector128Single(vi1);
                            var vf2 = Sse2.ConvertToVector128Single(vi2);
                            var vf3 = Sse2.ConvertToVector128Single(vi3);

                            vf0 = Sse.Multiply(Sse.Subtract(vf0, voffs), vscal);
                            vf1 = Sse.Multiply(Sse.Subtract(vf1, voffs), vscal);
                            vf2 = Sse.Multiply(Sse.Subtract(vf2, voffs), vscal);
                            vf3 = Sse.Multiply(Sse.Subtract(vf3, voffs), vscal);

                            Sse.Store(op, vf0);
                            Sse.Store(op + Vector128 <int> .Count, vf1);
                            Sse.Store(op + Vector128 <int> .Count * 2, vf2);
                            Sse.Store(op + Vector128 <int> .Count * 3, vf3);
                            op += Vector128 <byte> .Count;
                        }
                        ipe += Vector128 <byte> .Count;
                    }
#elif VECTOR_CONVERT
                    var vscal = new VectorF(scale);
                    var voffs = new VectorF(offset);

                    ipe -= Vector <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vb = Unsafe.ReadUnaligned <Vector <byte> >(ip);
                        Vector.Widen(vb, out var vs0, out var vs1);
                        Vector.Widen(vs0, out var vi0, out var vi1);
                        Vector.Widen(vs1, out var vi2, out var vi3);
                        ip += Vector <byte> .Count;

                        var vf0 = Vector.ConvertToSingle(Vector.AsVectorInt32(vi0));
                        var vf1 = Vector.ConvertToSingle(Vector.AsVectorInt32(vi1));
                        var vf2 = Vector.ConvertToSingle(Vector.AsVectorInt32(vi2));
                        var vf3 = Vector.ConvertToSingle(Vector.AsVectorInt32(vi3));

                        vf0 = (vf0 - voffs) * vscal;
                        vf1 = (vf1 - voffs) * vscal;
                        vf2 = (vf2 - voffs) * vscal;
                        vf3 = (vf3 - voffs) * vscal;

                        Unsafe.WriteUnaligned(op, vf0);
                        Unsafe.WriteUnaligned(op + VectorF.Count, vf1);
                        Unsafe.WriteUnaligned(op + VectorF.Count * 2, vf2);
                        Unsafe.WriteUnaligned(op + VectorF.Count * 3, vf3);
                        op += Vector <byte> .Count;
                    }
                    ipe += Vector <byte> .Count;
#endif

                    ipe -= 8;
                    while (ip <= ipe)
                    {
                        float o0 = at[(uint)ip[0]];
                        float o1 = at[(uint)ip[1]];
                        float o2 = at[(uint)ip[2]];
                        float o3 = at[(uint)ip[3]];
                        float o4 = at[(uint)ip[4]];
                        float o5 = at[(uint)ip[5]];
                        float o6 = at[(uint)ip[6]];
                        float o7 = at[(uint)ip[7]];
                        ip += 8;

                        op[0] = o0;
                        op[1] = o1;
                        op[2] = o2;
                        op[3] = o3;
                        op[4] = o4;
                        op[5] = o5;
                        op[6] = o6;
                        op[7] = o7;
                        op   += 8;
                    }
                    ipe += 8;

                    while (ip < ipe)
                    {
                        op[0] = at[(uint)ip[0]];
                        ip++;
                        op++;
                    }
                }
            }