Example #1
0
            private static unsafe (Vector256 <UInt32>[], int shift) ToVector(UInt32[] arr)
            {
                int first_zeros = FirstZeros(arr), digits = Digits(arr);

                Vector256 <UInt32>[] v = new Vector256 <UInt32> [(digits - first_zeros + Vector256 <UInt64> .Count - 1) / Vector256 <UInt64> .Count];

                fixed(UInt32 *p = arr)
                {
                    fixed(Vector256 <UInt32> *pv = v)
                    {
                        if ((digits - first_zeros) % Vector256 <UInt64> .Count == 0)
                        {
                            for (int i = 0, j = first_zeros; i < v.Length; i++, j += Vector256 <UInt64> .Count)
                            {
                                pv[i] = Avx2.ConvertToVector256Int64(Avx.LoadVector128(p + j)).AsUInt32();
                            }
                        }
                        else
                        {
                            int i, j;
                            for (i = 0, j = first_zeros; i < v.Length - 1; i++, j += Vector256 <UInt64> .Count)
                            {
                                pv[i] = Avx2.ConvertToVector256Int64(Avx.LoadVector128(p + j)).AsUInt32();
                            }

                            pv[i] = Avx2.ConvertToVector256Int64(Avx2.MaskLoad(p + j, Mask128.LSV((uint)(digits - j)))).AsUInt32();
                        }
                    }
                }

                return(v, first_zeros);
            }
Example #2
0
        public static unsafe int AVXVecMinIndependent(int[] x)
        {
            int len  = x.Length;
            var min1 = Vector128.Create(int.MaxValue);
            var min2 = Vector128.Create(int.MaxValue);

            fixed(int *pSource = x)
            {
                int i = 0;
                int lastBlockIndex = len - (len % 8);

                while (i < lastBlockIndex)
                {
                    min1 = Avx.Min(min1, Avx.LoadVector128(pSource + i));
                    min2 = Avx.Min(min2, Avx.LoadVector128(pSource + i + 4));

                    i += 8;
                }
                var minValue = min1.ToScalar() + min2.ToScalar();

                while (i < len)
                {
                    minValue = MinBranchFree(minValue, pSource[i]);
                    i       += 1;
                }
                return(minValue);
            }
        }
Example #3
0
        unsafe static bool TestXmm_CanCSE()
        {
            const int VecLen = 4;

            int result  = -1;
            var mem     = stackalloc float [VecLen];
            var memSpan = new Span <float> (mem, VecLen);

            for (int i = 0; i < 1; i++)
            {
                if (Avx.IsSupported)
                {
                    Vector128 <float> x1, x2, x3, x4;
                    Vector128 <float> x5, x6, x7;

                    memSpan.Fill(25);
                    x1 = Avx.LoadVector128(mem);
                    x2 = Avx.LoadVector128(mem);
                    x3 = Avx.LoadVector128(mem);
                    x4 = Avx.LoadVector128(mem);

                    x5 = Avx.Add(x1, x2);
                    x6 = Avx.Add(x3, x4);
                    x7 = Avx.Add(x5, x6);

                    Avx.Store(mem, x7);
                    WriteArray(mem, VecLen);
                }
                else if (AdvSimd.IsSupported)
                {
                    Vector128 <float> x1, x2, x3, x4;
                    Vector128 <float> x5, x6, x7;

                    memSpan.Fill(25);
                    x1 = AdvSimd.LoadVector128(mem);
                    x2 = AdvSimd.LoadVector128(mem);
                    x3 = AdvSimd.LoadVector128(mem);
                    x4 = AdvSimd.LoadVector128(mem);

                    x5 = AdvSimd.Add(x1, x2);
                    x6 = AdvSimd.Add(x3, x4);
                    x7 = AdvSimd.Add(x5, x6);

                    AdvSimd.Store(mem, x7);
                    WriteArray(mem, VecLen);
                }
                else
                {
                    Console.WriteLine("Hardware Intrinsics not supported");
                    return(true);
                }
            }

            if (mem[0] != 100.00)
            {
                Console.WriteLine("XMM_CanCSE Test Failed");
                return(false);
            }
            return(true);
        }
Example #4
0
        public static unsafe int AVXVecMin(int[] x)
        {
            int len = x.Length;
            var min = Vector128.Create(int.MaxValue);

            fixed(int *pSource = x)
            {
                int i = 0;
                int lastBlockIndex = len - (len % 4);

                while (i < lastBlockIndex)
                {
                    min = Avx.Min(min, Avx.LoadVector128(pSource + i));
                    i  += 4;
                }
                var minValue = min.ToScalar();

                while (i < len)
                {
                    minValue = MinBranchFree(minValue, pSource[i]);
                    i       += 1;
                }
                return(minValue);
            }
        }
        public unsafe void IntrinsicsAVX(int cores)
        {
            Parallel.For(1, cores + 1, index =>
            {
                int max = 1048576 / cores;

                long[] stor1 = new long[16];
                long[] stor2 = new long[16];

                for (int position = 0; position < stor1.Length; position++)
                {
                    stor1[position] = 0x5555555555555555;
                    stor2[position] = 0x2AAAAAAAAAAAAAAA;
                }

                fixed(long *pStor1 = stor1)
                fixed(long *pStor2 = stor2)
                {
                    Vector128 <long> s1 = Avx.LoadVector128(pStor1);
                    Vector128 <long> s2 = Avx.LoadVector128(pStor2);

                    // This may be hard to understand: I want to have 2 calls to reach 256 bytes.
                    for (int bufCnt = 0; bufCnt < max; bufCnt++)
                    {
                        s1 = Avx.Xor(s1, Avx.Xor(s1, s2)).AsInt64();
                    }
                }
            });
        }
Example #6
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var op1    = Avx.LoadVector128((Single *)(_dataTable.inArray1Ptr));
            var op2    = Avx.LoadVector128((Int32 *)(_dataTable.inArray2Ptr));
            var result = Avx.PermuteVar(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var left   = Avx.LoadVector256((Int16 *)(_dataTable.inArray1Ptr));
            var right  = Avx.LoadVector128((Int16 *)(_dataTable.inArray2Ptr));
            var result = Avx.InsertVector128(left, right, 1);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Example #8
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var left  = Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr));
            var right = Avx.LoadVector128((Double *)(_dataTable.inArray2Ptr));

            Avx.MaskStore((Double *)_dataTable.outArrayPtr, left, right);

            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
Example #9
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Avx.PermuteVar(
                Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((Int64 *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Example #10
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var first  = Avx.LoadVector128((Int32 *)(_dataTable.inArray0Ptr));
            var second = Avx.LoadVector128((Byte *)(_dataTable.inArray1Ptr));
            var third  = Avx.LoadVector128((SByte *)(_dataTable.inArray2Ptr));
            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(first, second, third, _dataTable.outArrayPtr);
        }
Example #11
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            Avx.MaskStore(
                (Double *)_dataTable.outArrayPtr,
                Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((Double *)(_dataTable.inArray2Ptr))
                );

            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Example #12
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Avx.PermuteVar(
                Avx.LoadVector128((Double *)(&test._fld1)),
                Avx.LoadVector128((Int64 *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
Example #13
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result1 = AvxVnni.MultiplyWideningAndAdd(
                Avx.LoadVector128((Int32 *)(_dataTable.inArray0Ptr)),
                Avx.LoadVector128((Byte *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((SByte *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result1);
            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Example #14
0
        public void RunReflectionScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));

            var result = typeof(Avx).GetMethod(nameof(Avx.PermuteVar), new Type[] { typeof(Vector128 <Double>), typeof(Vector128 <Int64>) })
                         .Invoke(null, new object[] {
                Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((Int64 *)(_dataTable.inArray2Ptr))
            });

            Unsafe.Write(_dataTable.outArrayPtr, (Vector128 <Double>)(result));
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
        public static unsafe Vector128 <int> AsInt(this Vector256 <long> l)
        {
            // (0, 1, 0, 2, 0, 3, 0, 4) -> (1, 2, 1, 2, 3, 4, 3, 4)
            var v = Avx2.Shuffle(
                l.AsInt32(),
                136
                );
            var content = stackalloc int[8];

            Avx2.Store(content, v);
            // (1, 2, 1, 2, 3, 4, 3, 4)  -> (1, 2, 3, 4)
            return(Avx.LoadVector128(content + 2));
        }
Example #16
0
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Avx.InsertVector128(
                Avx.LoadVector256((Byte *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((Byte *)(_dataTable.inArray2Ptr)),
                1
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Example #17
0
        public void RunReflectionScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));

            typeof(Avx).GetMethod(nameof(Avx.MaskStore), new Type[] { typeof(Double *), typeof(Vector128 <Double>), typeof(Vector128 <Double>) })
            .Invoke(null, new object[] {
                Pointer.Box(_dataTable.outArrayPtr, typeof(Double *)),
                Avx.LoadVector128((Double *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((Double *)(_dataTable.inArray2Ptr))
            });

            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Example #18
0
 private static void LoadMessage(
     int round,
     ulong *block,
     int *permutations,
     Vector256 <ulong> ffMask,
     Span <Vector256 <ulong> > permutedMsg)
 {
     Debug.Assert(permutedMsg.Length == 4);
     for (int i = 0; i < 4; i++)
     {
         var offset      = round * 16 + i * Vector128 <int> .Count;
         var permutation = Avx.LoadVector128(permutations + offset);
         permutedMsg[i] = Avx2.GatherMaskVector256(
             source: default, // what does this do?
Example #19
0
        public void RunReflectionScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));

            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128 <Int32>), typeof(Vector128 <Byte>), typeof(Vector128 <SByte>) })
                         .Invoke(null, new object[] {
                Avx.LoadVector128((Int32 *)(_dataTable.inArray0Ptr)),
                Avx.LoadVector128((Byte *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((SByte *)(_dataTable.inArray2Ptr))
            });

            Unsafe.Write(_dataTable.outArrayPtr, (Vector128 <Int32>)(result));
            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Example #20
0
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__PermuteVarDouble testClass)
            {
                fixed(Vector128 <Double> *pFld1 = &_fld1)
                fixed(Vector128 <Int64> *pFld2 = &_fld2)
                {
                    var result = Avx.PermuteVar(
                        Avx.LoadVector128((Double *)(pFld1)),
                        Avx.LoadVector128((Int64 *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
        public void RunReflectionScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));

            var result = typeof(Avx).GetMethod(nameof(Avx.InsertVector128), new Type[] { typeof(Vector256 <Int16>), typeof(Vector128 <Int16>), typeof(byte) })
                         .Invoke(null, new object[] {
                Avx.LoadVector256((Int16 *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector128((Int16 *)(_dataTable.inArray2Ptr)),
                (byte)1
            });

            Unsafe.Write(_dataTable.outArrayPtr, (Vector256 <Int16>)(result));
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
Example #22
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector128 <Single> *pClsVar1 = &_clsVar1)
            fixed(Vector128 <Int32> *pClsVar2 = &_clsVar2)
            {
                var result = Avx.PermuteVar(
                    Avx.LoadVector128((Single *)(pClsVar1)),
                    Avx.LoadVector128((Int32 *)(pClsVar2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
            }
        }
Example #23
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Double> *pFld1 = &_fld1)
            fixed(Vector128 <Int64> *pFld2 = &_fld2)
            {
                var result = Avx.PermuteVar(
                    Avx.LoadVector128((Double *)(pFld1)),
                    Avx.LoadVector128((Int64 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
Example #24
0
        internal static unsafe (UnsafeMemory <BitState> bits, bool isValidBinary) ToBitStates(ReadOnlySpan <byte> valueText, BitAllocator bitAlloc)
        {
            UnsafeMemory <BitState> bitsMem = bitAlloc.GetBits(valueText.Length);
            Span <BitState>         bits    = bitsMem.Span;

            ulong isValidBinary = 0;
            int   index         = 0;

            if (Ssse3.IsSupported && bits.Length >= Vector128 <byte> .Count)
            {
                int vecBitCount = bits.Length / Vector128 <byte> .Count;
                fixed(BitState *bitsPtr = bits)
                {
                    fixed(byte *textPtr = valueText)
                    {
                        Vector128 <ulong> isValidBin = Vector128 <ulong> .Zero;

                        for (; index < vecBitCount; index++)
                        {
                            var charText = Avx.LoadVector128(textPtr + index * Vector128 <byte> .Count);
                            var byteText = Avx.Shuffle(charText, shuffleIdxs);

                            var firstBit         = Avx.And(onlyFirstBit, Avx.Or(byteText, Avx.ShiftRightLogical(byteText.AsInt32(), 1).AsByte()));
                            var secondBit        = Avx.And(onlySecondBit, Avx.ShiftRightLogical(byteText.AsInt32(), 5).AsByte());
                            var bytesAsBitStates = Avx.Or(firstBit, secondBit);

                            Avx.Store((byte *)bitsPtr + bits.Length - (index + 1) * Vector128 <byte> .Count, bytesAsBitStates);
                            isValidBin = Avx.Or(isValidBin, secondBit.AsUInt64());
                        }

                        isValidBinary = isValidBin.GetElement(0) | isValidBin.GetElement(1);
                    }
                }

                index *= Vector128 <byte> .Count;
            }

            for (; index < bits.Length; index++)
            {
                BitState bit = ToBitState(valueText[index]);
                bits[bits.Length - index - 1] = bit;
                isValidBinary |= (uint)bit & 0b10;
            }

            return(bitsMem, isValidBinary == 0);
        }
            static unsafe Mask128()
            {
                lstable = new Vector128 <UInt32> [MM128UInt32s];
                mstable = new Vector128 <UInt32> [MM128UInt32s];

                UInt32[] value = new UInt32[7] {
                    ~0u, ~0u, ~0u, 0u, 0u, 0u, 0u
                };

                fixed(UInt32 *v = value)
                {
                    for (int i = 0; i < lstable.Length; i++)
                    {
                        lstable[i] = Avx.LoadVector128(v + (MM128UInt32s - 1 - i));
                        mstable[i] = Avx2.Xor(Vector128.Create(~0u), lstable[i]);
                    }
                }
            }
Example #26
0
        public unsafe void IntrinsicsAVX(byte[] oldScreen, byte[] newScreen, byte[] difference)
        {
            int steps = difference.Length / 16;

            fixed(byte *pOld = oldScreen)
            fixed(byte *pNew  = newScreen)
            fixed(byte *pDiff = difference)
            {
                long *ppOld  = (long *)pOld;
                long *ppNew  = (long *)pNew;
                long *ppDiff = (long *)pDiff;

                for (int position = 0; position < steps; ppOld += 2, ppNew += 2, ppDiff += 2, position++)
                {
                    Avx.Store(ppDiff, Avx.Xor(Avx.LoadVector128(ppOld), Avx.LoadVector128(ppNew)));
                }
            }
        }
Example #27
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleBinaryOpTest__PermuteVarSingle();

            fixed(Vector128 <Single> *pFld1 = &test._fld1)
            fixed(Vector128 <Int32> *pFld2 = &test._fld2)
            {
                var result = Avx.PermuteVar(
                    Avx.LoadVector128((Single *)(pFld1)),
                    Avx.LoadVector128((Int32 *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
            }
        }
        public unsafe void IntrinsicsAVX(int cores)
        {
            byte[] oldScreen = new byte[256];
            byte[] newScreen = new byte[256];

            Random rng = new Random();

            rng.NextBytes(oldScreen);
            rng.NextBytes(newScreen);

            Parallel.For(1, cores + 1, index =>
            {
                int max = 1048576 / cores;

                byte[] difference = new byte[256];

                for (int position = 0; position < 256; position++)
                {
                    difference[position] = 0x00;
                }

                int steps = difference.Length / 16;

                fixed(byte *pOld  = oldScreen)
                fixed(byte *pNew  = newScreen)
                fixed(byte *pDiff = difference)
                for (int bufCnt = 0; bufCnt < max; bufCnt++)
                {
                    long *ppOld  = (long *)pOld;
                    long *ppNew  = (long *)pNew;
                    long *ppDiff = (long *)pDiff;

                    for (int position = 0; position < steps; ppOld += 2, ppNew += 2, ppDiff += 2, position++)
                    {
                        Avx.Store(ppDiff, Avx.Xor(Avx.LoadVector128(ppOld), Avx.LoadVector128(ppNew)));
                    }
                }
            });
        }
Example #29
0
        public unsafe void IntrinsicsAVX(byte[] oldScreen, byte[] newScreen, byte[] difference, int cores)
        {
            int steps = difference.Length / 16;

            int max = 1048576 / cores;

            Parallel.For(1, cores + 1, index =>
            {
                fixed(byte *pOld  = oldScreen)
                fixed(byte *pNew  = newScreen)
                fixed(byte *pDiff = difference)
                for (int bufCnt = 0; bufCnt < max; bufCnt++)
                {
                    long *ppOld  = (long *)pOld;
                    long *ppNew  = (long *)pNew;
                    long *ppDiff = (long *)pDiff;

                    for (int position = 0; position < steps; ppOld += 2, ppNew += 2, ppDiff += 2, position++)
                    {
                        Avx.Store(ppDiff, Avx.Xor(Avx.LoadVector128(ppOld), Avx.LoadVector128(ppNew)));
                    }
                }
            });
        }
Example #30
0
        public static unsafe float GetScribnerBoardFeetPerAcre(Trees trees)
        {
            // for now, assume all trees are of the same species
            if (trees.Species != FiaCode.PseudotsugaMenziesii)
            {
                throw new NotSupportedException();
            }
            if (trees.Units != Units.English)
            {
                throw new NotSupportedException();
            }

            // Douglas-fir
            #if DEBUG
            Vector128 <float> v6p8 = AvxExtensions.BroadcastScalarToVector128(6.8F);
            Vector128 <float> v10k = AvxExtensions.BroadcastScalarToVector128(10.0F * 1000.0F);
            #endif

            // constants
            Vector128 <float> forestersEnglish = AvxExtensions.BroadcastScalarToVector128(Constant.ForestersEnglish);
            Vector128 <float> one = AvxExtensions.BroadcastScalarToVector128(1.0F);
            Vector128 <float> six = AvxExtensions.BroadcastScalarToVector128(6.0F);

            Vector128 <float> vm3p21809   = AvxExtensions.BroadcastScalarToVector128(-3.21809F); // b4
            Vector128 <float> v0p04948    = AvxExtensions.BroadcastScalarToVector128(0.04948F);
            Vector128 <float> vm0p15664   = AvxExtensions.BroadcastScalarToVector128(-0.15664F);
            Vector128 <float> v2p02132    = AvxExtensions.BroadcastScalarToVector128(2.02132F);
            Vector128 <float> v1p63408    = AvxExtensions.BroadcastScalarToVector128(1.63408F);
            Vector128 <float> vm0p16184   = AvxExtensions.BroadcastScalarToVector128(-0.16184F);
            Vector128 <float> v1p033      = AvxExtensions.BroadcastScalarToVector128(1.033F);
            Vector128 <float> v1p382937   = AvxExtensions.BroadcastScalarToVector128(1.382937F);
            Vector128 <float> vm0p4015292 = AvxExtensions.BroadcastScalarToVector128(-0.4015292F);
            Vector128 <float> v0p087266   = AvxExtensions.BroadcastScalarToVector128(0.087266F);
            Vector128 <float> vm0p174533  = AvxExtensions.BroadcastScalarToVector128(-0.174533F);

            Vector128 <float> vm0p6896598794 = AvxExtensions.BroadcastScalarToVector128(-0.6896598794F); // rc6-rs632
            Vector128 <float> v0p993         = AvxExtensions.BroadcastScalarToVector128(0.993F);
            Vector128 <float> v0p174439      = AvxExtensions.BroadcastScalarToVector128(0.174439F);
            Vector128 <float> v0p117594      = AvxExtensions.BroadcastScalarToVector128(0.117594F);
            Vector128 <float> vm8p210585     = AvxExtensions.BroadcastScalarToVector128(-8.210585F);
            Vector128 <float> v0p236693      = AvxExtensions.BroadcastScalarToVector128(0.236693F);
            Vector128 <float> v0p00001345    = AvxExtensions.BroadcastScalarToVector128(0.00001345F);
            Vector128 <float> v0p00001937    = AvxExtensions.BroadcastScalarToVector128(0.00001937F);
            Vector128 <float> v1p001491      = AvxExtensions.BroadcastScalarToVector128(1.001491F);
            Vector128 <float> vm6p924097     = AvxExtensions.BroadcastScalarToVector128(-6.924097F);
            Vector128 <float> v0p912733      = AvxExtensions.BroadcastScalarToVector128(0.912733F);
            Vector128 <float> v0p00001351    = AvxExtensions.BroadcastScalarToVector128(0.00001351F);

            fixed(float *dbh = &trees.Dbh[0], expansionFactors = &trees.LiveExpansionFactor[0], height = &trees.Height[0])
            {
                Vector128 <float> standBoardFeetPerAcre = Vector128 <float> .Zero;

                for (int treeIndex = 0; treeIndex < trees.Count; treeIndex += Constant.Simd128x4.Width)
                {
                    Vector128 <float> dbhInInches  = Avx.LoadVector128(dbh + treeIndex);
                    Vector128 <float> heightInFeet = Avx.LoadVector128(height + treeIndex);

                    Vector128 <float> logDbhInInches  = MathV.Log10(dbhInInches);
                    Vector128 <float> logHeightInFeet = MathV.Log10(heightInFeet);
                    // FiaCode.PseudotsugaMenziesii => -3.21809F + 0.04948F * logHeightInFeet * logDbhInInches - 0.15664F * logDbhInInches * logDbhInInches +
                    //                                  2.02132F * logDbhInInches + 1.63408F * logHeightInFeet - 0.16184F * logHeightInFeet * logHeightInFeet,
                    Vector128 <float> cvtsl = Avx.Add(vm3p21809, Avx.Multiply(v0p04948, Avx.Multiply(logHeightInFeet, logDbhInInches)));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p15664, Avx.Multiply(logDbhInInches, logDbhInInches)));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(v2p02132, logDbhInInches));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(v1p63408, logHeightInFeet));
                    cvtsl = Avx.Add(cvtsl, Avx.Multiply(vm0p16184, Avx.Multiply(logHeightInFeet, logHeightInFeet)));
                    Vector128 <float> cubicFeet = MathV.Exp10(cvtsl);

                    Vector128 <float> dbhSquared            = Avx.Multiply(dbhInInches, dbhInInches); // could be consolidated by merging other scaling constants with Forester's constant for basal area
                    Vector128 <float> basalAreaInSquareFeet = Avx.Multiply(forestersEnglish, dbhSquared);
                    // b4 = cubicFeet / (1.033F * (1.0F + 1.382937F * MathV.Exp(-4.015292F * dbhInInches / 10.0F)) * (basalAreaInSquareFeet + 0.087266F) - 0.174533F);
                    Vector128 <float> b4 = Avx.Divide(cubicFeet, Avx.Add(Avx.Multiply(v1p033,
                                                                                      Avx.Multiply(Avx.Add(one, Avx.Multiply(v1p382937,
                                                                                                                             MathV.Exp(Avx.Multiply(vm0p4015292,
                                                                                                                                                    dbhInInches)))),
                                                                                                   Avx.Add(basalAreaInSquareFeet, v0p087266))),
                                                                         vm0p174533));
                    Vector128 <float> cv4 = Avx.Multiply(b4, Avx.Subtract(basalAreaInSquareFeet, v0p087266));

                    // conversion to Scribner volumes for 32 foot trees
                    // Waddell 2014:32
                    // rc6 = 0.993F * (1.0F - MathF.Pow(0.62F, dbhInInches - 6.0F));
                    Vector128 <float> rc6   = Avx.Multiply(v0p993, Avx.Subtract(one, MathV.Exp(Avx.Multiply(vm0p6896598794, Avx.Subtract(dbhInInches, six))))); // log2(0.62) = -0.6896598794
                    Vector128 <float> cv6   = Avx.Multiply(rc6, cv4);
                    Vector128 <float> logB4 = MathV.Log10(b4);
                    // float rs616 = MathF.Pow(10.0F, 0.174439F + 0.117594F * logDbhInInches * logB4 - 8.210585F / (dbhInInches * dbhInInches) + 0.236693F * logB4 - 0.00001345F * b4 * b4 - 0.00001937F * dbhInInches * dbhInInches);
                    Vector128 <float> rs616l = Avx.Add(v0p174439, Avx.Multiply(v0p117594, Avx.Multiply(logDbhInInches, logB4)));
                    rs616l = Avx.Add(rs616l, Avx.Divide(vm8p210585, dbhSquared));
                    rs616l = Avx.Add(rs616l, Avx.Multiply(v0p236693, logB4));
                    rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001345, Avx.Multiply(b4, b4)));
                    rs616l = Avx.Subtract(rs616l, Avx.Multiply(v0p00001937, dbhSquared));
                    Vector128 <float> rs616 = MathV.Exp10(rs616l);
                    Vector128 <float> sv616 = Avx.Multiply(rs616, cv6); // Scribner board foot volume to a 6 inch top for 16 foot logs
                    // float rs632 = 1.001491F - 6.924097F / tarif + 0.00001351F * dbhInInches * dbhInInches;
                    Vector128 <float> rs632 = Avx.Add(v1p001491, Avx.Divide(vm6p924097, Avx.Multiply(v0p912733, b4)));
                    rs632 = Avx.Add(rs632, Avx.Multiply(v0p00001351, dbhSquared));
                    Vector128 <float> zeroVolumeMask = Avx.CompareLessThanOrEqual(dbhInInches, six);
                    Vector128 <float> sv632          = Avx.Multiply(rs632, sv616); // Scribner board foot volume to a 6 inch top for 32 foot logs
                    sv632 = Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask);

                    #if DEBUG
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rc6, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(rc6, one));
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs616, one, zeroVolumeMask), one));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs616, Vector128 <float> .Zero, zeroVolumeMask), v6p8));
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(rs632, Vector128 <float> .Zero, zeroVolumeMask), one));
                    DebugV.Assert(Avx.CompareGreaterThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), Vector128 <float> .Zero));
                    DebugV.Assert(Avx.CompareLessThanOrEqual(Avx.BlendVariable(sv632, Vector128 <float> .Zero, zeroVolumeMask), v10k));
                    #endif

                    Vector128 <float> expansionFactor = Avx.LoadVector128(expansionFactors + treeIndex);
                    standBoardFeetPerAcre = Avx.Add(standBoardFeetPerAcre, Avx.Multiply(expansionFactor, sv632));
                }

                standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre);
                standBoardFeetPerAcre = Avx.HorizontalAdd(standBoardFeetPerAcre, standBoardFeetPerAcre);
                return(standBoardFeetPerAcre.ToScalar());
            }
        }