public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector256 <Int32> *pFld1 = &_fld1)
            fixed(Vector256 <Int32> *pFld2 = &_fld2)
            fixed(Vector256 <Int32> *pFld3 = &_fld3)
            {
                var result = Avx2.BlendVariable(
                    Avx.LoadVector256((Int32 *)(pFld1)),
                    Avx.LoadVector256((Int32 *)(pFld2)),
                    Avx.LoadVector256((Int32 *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
            }
        }
Example #2
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleBinaryOpTest__SignSByte();

            fixed(Vector256 <SByte> *pFld1 = &test._fld1)
            fixed(Vector256 <SByte> *pFld2 = &test._fld2)
            {
                var result = Avx2.Sign(
                    Avx.LoadVector256((SByte *)(pFld1)),
                    Avx.LoadVector256((SByte *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
            }
        }
        internal static v256 mul_byte(v256 left, v256 right)
        {
            if (Avx2.IsAvx2Supported)
            {
                v256 productEvenIndices = Avx2.mm256_mullo_epi16(left, right);

                left  = Avx2.mm256_srli_epi16(left, 8);
                right = Avx2.mm256_srli_epi16(right, 8);

                v256 productOddIndices = Avx2.mm256_slli_epi16(Avx2.mm256_mullo_epi16(left, right), 8);

                return(Avx2.mm256_blendv_epi8(productEvenIndices, productOddIndices, new v256(0xFF00_FF00)));
            }
            else
            {
                throw new CPUFeatureCheckException();
            }
        }
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector256 <Int32> *pClsVar1 = &_clsVar1)
            fixed(Vector256 <Int32> *pClsVar2 = &_clsVar2)
            fixed(Vector256 <Int32> *pClsVar3 = &_clsVar3)
            {
                var result = Avx2.BlendVariable(
                    Avx.LoadVector256((Int32 *)(pClsVar1)),
                    Avx.LoadVector256((Int32 *)(pClsVar2)),
                    Avx.LoadVector256((Int32 *)(pClsVar3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
            }
        }
Example #5
0
            private static unsafe (Vector256 <UInt32>[] hi, Vector256 <UInt32>[] lo) Mul(Vector256 <UInt32>[] v, UInt32 n)
            {
                Vector256 <UInt32>[] w_hi = new Vector256 <UInt32> [v.Length], w_lo = new Vector256 <UInt32> [v.Length];
                Vector256 <UInt32>   u    = Avx2.ConvertToVector256Int64(Vector128.Create(n)).AsUInt32();
                Vector256 <UInt32>   mask = lower_mask;

                fixed(Vector256 <UInt32> *pv = v, pw_hi = w_hi, pw_lo = w_lo)
                {
                    for (int i = 0; i < v.Length; i++)
                    {
                        Vector256 <UInt32> c = Avx2.Multiply(pv[i], u).AsUInt32();

                        pw_hi[i] = Avx2.And(Avx2.Shuffle(c, MM_PERM_CDAB), mask);
                        pw_lo[i] = Avx2.And(c, mask);
                    }
                }

                return(w_hi, w_lo);
            }
        /* Routine optimized for shuffling a buffer for a type size of 2 bytes. */
        private static unsafe void shuffle2_avx2(byte *dest, byte *src,
                                                 int vectorizable_elements, int total_elements)
        {
            int bytesoftype = 2;
            int j;
            int k;
            var ymm0 = new Vector256 <byte> [2];
            var ymm1 = new Vector256 <byte> [2];

            /* Create the shuffle mask.
             * NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from
             * most to least significant (i.e., their order is reversed when compared to
             * loading the mask from an array). */
            var shmask = Vector256.Create((byte)
                                          0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01,
                                          0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x00,
                                          0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01,
                                          0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x00);

            for (j = 0; j < vectorizable_elements; j += sizeof(Vector256 <byte>))
            {
                /* Fetch 32 elements (64 bytes) then transpose bytes, words and double words. */
                for (k = 0; k < 2; k++)
                {
                    ymm0[k] = Avx.LoadVector256((src + (j * bytesoftype) + (k * sizeof(Vector256 <byte>))));
                    ymm1[k] = Avx2.Shuffle(ymm0[k], shmask);
                }

                ymm0[0] = Avx2.Permute4x64(ymm1[0].AsInt64(), 0xd8).AsByte();
                ymm0[1] = Avx2.Permute4x64(ymm1[1].AsInt64(), 0x8d).AsByte();

                ymm1[0] = Avx2.Blend(ymm0[0].AsInt32(), ymm0[1].AsInt32(), 0xf0).AsByte();
                ymm0[1] = Avx2.Blend(ymm0[0].AsInt32(), ymm0[1].AsInt32(), 0x0f).AsByte();
                ymm1[1] = Avx2.Permute4x64(ymm0[1].AsInt64(), 0x4e).AsByte();

                /* Store the result vectors */
                byte *dest_for_jth_element = dest + j;
                for (k = 0; k < 2; k++)
                {
                    Avx2.Store((dest_for_jth_element + (k * total_elements)), ymm1[k]);
                }
            }
        }
Example #7
0
        public static unsafe void ReverseBits(this Span <int> span)
        {
            var intsReversed = 0;

            if (Avx2.IsSupported)
            {
                fixed(int *ptr = span)
                {
                    var vectorCount = span.Length / 8;

                    for (int i = 0; i < vectorCount; i++)
                    {
                        var vector  = Avx.LoadVector256((ptr + intsReversed));
                        var vector2 = Avx2.And(Avx2.And(vector, Vector256.Create(0xFF00FF)), Vector256.Create(-16711936));
                        vector =
                            Avx2.Add(
                                Avx2.Or(
                                    Avx2.ShiftRightLogical(vector, 8),
                                    Avx2.ShiftLeftLogical(vector, 24)
                                    ),
                                Avx2.Or(
                                    Avx2.ShiftLeftLogical(vector2, 8),
                                    Avx2.ShiftRightLogical(vector2, 24)
                                    )
                                );

                        Avx.Store(ptr + intsReversed, vector);
                        intsReversed += 8;
                    }
                }
            }

            for (int i = intsReversed; i < span.Length; i++)
            {
                span[i] = BinaryPrimitives.ReverseEndianness(span[i]);
            }

            fixed(void *ptr = span)
            {
                new Span <byte>(ptr, span.Length * 4).ReverseBits();
            }
        }
Example #8
0
 public void Add1()
 {
     //var left = Vector256.Create(a+0,a+1,a+2,a+3,a+4,a+5,a+6,a+7,a+8,a+9,a+10,a+11,a+12,a+13,a+14,a+15);
     //var right = Vector256.Create(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
     for (var left0 = 0; left0 < 1; left0++)
     {
         var left1 = (Byte)left0;
         var left2 = Vector256.Create(left1);
         for (var right0 = 0; right0 < 1; right0++)
         {
             var right1 = (Byte)right0;
             var right2 = Vector256.Create(right1);
             var actual = Avx2.Add(left2, right2);
             for (var index = 0; index < 32; index++)
             {
                 Assert.AreEqual((Byte)(left0 + right0), actual.GetElement(index));
             }
         }
     }
 }
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleTernaryOpTest__BlendVariableInt16();

            fixed(Vector256 <Int16> *pFld1 = &test._fld1)
            fixed(Vector256 <Int16> *pFld2 = &test._fld2)
            fixed(Vector256 <Int16> *pFld3 = &test._fld3)
            {
                var result = Avx2.BlendVariable(
                    Avx.LoadVector256((Int16 *)(pFld1)),
                    Avx.LoadVector256((Int16 *)(pFld2)),
                    Avx.LoadVector256((Int16 *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
            }
        }
Example #10
0
 public void 水平加算Int16()
 {
     for (var a = 0; a < 1; a++)
     {
         var operand0 = Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         var operand1 = Vector256.Create(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         for (var b = 0; b < 1; b++)
         {
             var result = Avx2.HorizontalAdd(operand0, operand1);
         }
     }
     for (var a = 0; a < 1; a++)
     {
         var operand0 = Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         var operand1 = Vector256.Create(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         var result   = Vector256.Create(
             (Byte)(operand0.GetElement(0) + operand0.GetElement(1)), (Byte)(operand0.GetElement(2) + operand0.GetElement(3)), (Byte)(operand0.GetElement(4) + operand0.GetElement(5)), (Byte)(operand0.GetElement(6) + operand0.GetElement(7)), (Byte)(operand0.GetElement(8) + operand0.GetElement(9)), (Byte)(operand0.GetElement(10) + operand0.GetElement(11)), (Byte)(operand0.GetElement(12) + operand0.GetElement(13)), (Byte)(operand0.GetElement(14) + operand0.GetElement(15)),
             (Byte)(operand1.GetElement(0) + operand1.GetElement(1)), (Byte)(operand1.GetElement(2) + operand1.GetElement(3)), (Byte)(operand1.GetElement(4) + operand1.GetElement(5)), (Byte)(operand1.GetElement(6) + operand1.GetElement(7)), (Byte)(operand1.GetElement(8) + operand1.GetElement(9)), (Byte)(operand1.GetElement(10) + operand1.GetElement(11)), (Byte)(operand1.GetElement(12) + operand1.GetElement(13)), (Byte)(operand1.GetElement(14) + operand1.GetElement(15))
             );
     }
 }
Example #11
0
        public void AOSからSIMD合計ループ1展開()
        {
            var AOS    = AOS作成();
            var Bオフセット = Marshal.OffsetOf <ABC>(nameof(ABC.B)).ToInt32();
            var index  = Vector256.Create(
                sizeof(ABC) * 0 + Bオフセット,
                sizeof(ABC) * 1 + Bオフセット,
                sizeof(ABC) * 2 + Bオフセット,
                sizeof(ABC) * 3 + Bオフセット,
                sizeof(ABC) * 4 + Bオフセット,
                sizeof(ABC) * 5 + Bオフセット,
                sizeof(ABC) * 6 + Bオフセット,
                sizeof(ABC) * 7 + Bオフセット
                );
            var Count = (レコード数 + Int32ベクタ長 - 1) / Int32ベクタ長;
            Vector256 <Int32> Vector256Sum0 = default;
            var watch = Stopwatch.StartNew();

            fixed(ABC *pAOS = AOS)
            {
                for (var a = 0; a < 繰り返し数; a++)
                {
                    for (var b = 0; b < Count; b++)
                    {
                        Vector256Sum0 = Avx2.Add(Vector256Sum0, Avx2.GatherVector256((Int32 *)&pAOS[b * Int32ベクタ長], index, 1));
                    }
                }
            }

            watch.Stop();
            var Sum = 0;

            for (var a = 0; a < Int32ベクタ長; a++)
            {
                Sum += Vector256Sum0.GetElement(a);
            }
            Console.WriteLine($"Sum={Sum}");
            Console.WriteLine($"for {繰り返し数} loops: {watch.ElapsedMilliseconds}ms");
            Console.WriteLine();
        }
Example #12
0
        public unsafe int Intrinsics()
        {
            int vectorSize = 256 / 8 / 4;
            var temp       = stackalloc int[vectorSize];

            for (int j = 0; j < vectorSize; j++)
            {
                temp[j] = Item;
            }
            var mask      = Avx2.LoadVector256(temp);
            var accVector = Vector256 <int> .Zero;
            int i;
            var array = Array;

            fixed(int *ptr = array)
            {
                for (i = 0; i <= array.Length - vectorSize; i += vectorSize)
                {
                    var v        = Avx2.LoadVector256(ptr + i);
                    var areEqual = Avx2.CompareEqual(v, mask);
                    accVector = Avx2.Subtract(accVector, areEqual);
                }
            }

            int result = 0;

            Avx2.Store(temp, accVector);
            for (int j = 0; j < vectorSize; j++)
            {
                result += temp[j];
            }
            for (; i < array.Length; i++)
            {
                if (array[i] == Item)
                {
                    result++;
                }
            }
            return(result);
        }
Example #13
0
        public static void Or(this Span <byte> thisSpam, Span <byte> valueSpam)
        {
            var length = thisSpam.Length;

            if (length != valueSpam.Length)
            {
                throw new ArgumentException("Both byte spans has to be same length.");
            }

            int i = 0;

            fixed(byte *thisPtr = thisSpam)
            fixed(byte *valuePtr = valueSpam)
            {
                if (Avx2.IsSupported)
                {
                    for (; i < length - (Vector256 <byte> .Count - 1); i += Vector256 <byte> .Count)
                    {
                        Vector256 <byte> b1 = Avx2.LoadVector256(thisPtr + i);
                        Vector256 <byte> b2 = Avx2.LoadVector256(valuePtr + i);
                        Avx2.Store(thisPtr + i, Avx2.Or(b1, b2));
                    }
                }
                else if (Sse2.IsSupported)
                {
                    for (; i < length - (Vector128 <byte> .Count - 1); i += Vector128 <byte> .Count)
                    {
                        Vector128 <byte> b1 = Sse2.LoadVector128(thisPtr + i);
                        Vector128 <byte> b2 = Sse2.LoadVector128(valuePtr + i);
                        Sse2.Store(thisPtr + i, Sse2.Or(b1, b2));
                    }
                }
            }

            for (; i < length; i++)
            {
                thisSpam[i] |= valueSpam[i];
            }
        }
Example #14
0
    private static void Shuffle(ref Vector256 <uint> a, ref Vector256 <uint> b, ref Vector256 <uint> c, ref Vector256 <uint> d)
    {
        a = Avx2.PermuteVar8x32(a, Permute7);  // 3 19 9 25 4 20 14 30
        b = Avx2.PermuteVar8x32(b, Permute8);  // 0 16 10 26 5 21 15 31
        c = Avx2.PermuteVar8x32(c, Permute9);  // 1 17 11 27 6 22 12 28
        d = Avx2.PermuteVar8x32(d, Permute10); // 2 18 8 24 7 23 13 29

        var t0 = Avx2.UnpackLow(a, b);         // 3 0 19 16 4 5 20 21
        var t1 = Avx2.UnpackLow(c, d);         // 1 2 17 18 6 7 22 23
        var t2 = Avx2.UnpackHigh(a, b);        // 9 10 25 26 14 15 30 31
        var t3 = Avx2.UnpackHigh(c, d);        // 11 8 27 24 12 13 28 29

        a = Avx2.UnpackLow(t0, t1);            // 3 1 0 2 4 6 5 7
        b = Avx2.UnpackLow(t2, t3);            // 9 11 10 8 14 12 15 13
        c = Avx2.UnpackHigh(t0, t1);           // 19 17 16 18 20 22 21 23
        d = Avx2.UnpackHigh(t2, t3);           // 25 27 26 24 30 28 31 29

        a = Avx2.PermuteVar8x32(a, Permute11); // 0 1 2 3 4 5 6 7
        b = Avx2.PermuteVar8x32(b, Permute12); // 8 9 10 11 12 13 14 15
        c = Avx2.PermuteVar8x32(c, Permute11); // 16 17 18 19 20 21 22 23
        d = Avx2.PermuteVar8x32(d, Permute12); // 24 25 26 27 28 29 30 31
    }
Example #15
0
 public void Add2_Double()
 {
     for (var left_lower0 = 0; left_lower0 < 1; left_lower0++)
     {
         var left_lower1 = (Double)left_lower0;
         var left_lower2 = Vector128.Create(left_lower1);
         for (var left_upper0 = 0; left_upper0 < 1; left_upper0++)
         {
             var left_upper1 = (Double)left_upper0;
             var left_upper2 = Vector128.Create(left_upper1);
             var left3       = Vector256.Create(left_lower2, left_upper2);
             for (var right_lower0 = 0; right_lower0 < 1; right_lower0++)
             {
                 var right_lower1 = (Double)right_lower0;
                 var right_lower2 = Vector128.Create(right_lower1);
                 for (var right_upper0 = 0; right_upper0 < 1; right_upper0++)
                 {
                     var right_upper1    = (Double)right_upper0;
                     var right_upper2    = Vector128.Create(right_upper1);
                     var right3          = Vector256.Create(right_lower2, right_upper2);
                     var actual          = Avx2.Add(left3, right3);
                     var expected_upper0 = (UInt64)(left_upper0 + right_upper0);
                     var expected_upper1 = (Double)expected_upper0;
                     var expected_upper2 = Vector128.Create(expected_upper1);
                     var expected_lower0 = (UInt64)(left_lower0 + right_lower0);
                     var expected_lower1 = (Double)expected_lower0;
                     var expected_lower2 = Vector128.Create(expected_lower1);
                     var expected3       = Vector256.Create(expected_lower2, expected_upper2).AsDouble();
                     for (var index = 0; index < 4; index++)
                     {
                         Assert.AreEqual(expected3.GetElement(index), actual.GetElement(index));
                     }
                 }
             }
         }
     }
 }
Example #16
0
        public unsafe void IntrinsicsAVX2(byte[] oldScreen, byte[] newScreen, byte[] difference, int cores)
        {
            int steps = difference.Length / 32;

            int max = 1048576 / cores;

            Parallel.For(1, cores + 1, index =>
            {
                fixed(byte *pOld  = oldScreen)
                fixed(byte *pNew  = newScreen)
                fixed(byte *pDiff = difference)
                for (int bufCnt = 0; bufCnt < max; bufCnt++)
                {
                    long *ppOld  = (long *)pOld;
                    long *ppNew  = (long *)pNew;
                    long *ppDiff = (long *)pDiff;

                    for (int position = 0; position < steps; ppOld += 4, ppNew += 4, ppDiff += 4, position++)
                    {
                        Avx2.Store(ppDiff, Avx2.Xor(Avx2.LoadVector256(ppOld), Avx2.LoadVector256(ppNew)));
                    }
                }
            });
        }
Example #17
0
        static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Avx2.IsSupported)
            {
                Four    = 4;
                Eight   = 8;
                invalid = 15;

                for (int i = 0; i < N; i++)
                {
                    floatSourceTable[i]  = (float)i * 10.0f;
                    doubleSourceTable[i] = (double)i * 10.0;
                    intSourceTable[i]    = i * 10;
                    longSourceTable[i]   = i * 10;
                }

                Vector256 <int>  indexi;
                Vector256 <long> indexl;
                Vector128 <int>  indexi128;

                fixed(int *iptr = intIndexTable)
                fixed(long *lptr   = longIndexTable)
                fixed(int *i128ptr = vector128intIndexTable)
                {
                    indexi    = Avx.LoadVector256(iptr);
                    indexl    = Avx.LoadVector256(lptr);
                    indexi128 = Sse2.LoadVector128(i128ptr);
                }

                Vector256 <int>    maski;
                Vector256 <uint>   maskui;
                Vector256 <long>   maskl;
                Vector256 <ulong>  maskul;
                Vector256 <float>  maskf;
                Vector256 <double> maskd;

                fixed(int *iptr = intMaskTable)
                fixed(long *lptr = longMaskTable)
                {
                    maski = Avx.LoadVector256(iptr);
                    maskl = Avx.LoadVector256(lptr);

                    maskui = maski.AsUInt32();
                    maskul = maskl.AsUInt64();
                    maskf  = maski.AsSingle();
                    maskd  = maskl.AsDouble();
                }

                Vector256 <int>    sourcei  = Vector256 <int> .Zero;
                Vector256 <uint>   sourceui = Vector256 <uint> .Zero;
                Vector256 <long>   sourcel  = Vector256 <long> .Zero;
                Vector256 <ulong>  sourceul = Vector256 <ulong> .Zero;
                Vector256 <float>  sourcef  = Vector256 <float> .Zero;
                Vector256 <double> sourced  = Vector256 <double> .Zero;

                // public static unsafe Vector256<float> GatherMaskVector256(Vector256<float> source, float* baseAddress, Vector256<int> index, Vector256<float> mask, byte scale)
                using (TestTable <float, int> floatTable = new TestTable <float, int>(floatSourceTable, new float[8]))
                {
                    var vf = Avx2.GatherMaskVector256(sourcef, (float *)(floatTable.inArrayPtr), indexi, maskf, 4);
                    Unsafe.Write(floatTable.outArrayPtr, vf);

                    if (!floatTable.CheckResult((x, y) => BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y), intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on float:");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vf = (Vector256 <float>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <float>), typeof(float *), typeof(Vector256 <int>), typeof(Vector256 <float>), typeof(byte) }).
                         Invoke(null, new object[] { sourcef, Pointer.Box(floatTable.inArrayPtr, typeof(float *)), indexi, maskf, (byte)4 });
                    Unsafe.Write(floatTable.outArrayPtr, vf);

                    if (!floatTable.CheckResult((x, y) => BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y), intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on float:");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcef, (float *)(floatTable.inArrayPtr), indexi, maskf, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on float with invalid scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vf = Avx2.GatherMaskVector256(sourcef, (float *)(floatTable.inArrayPtr), indexi, maskf, Four);
                    Unsafe.Write(floatTable.outArrayPtr, vf);

                    if (!floatTable.CheckResult((x, y) => BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y), intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on float with non-const scale (IMM):");
                        foreach (var item in floatTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcef, (float *)(floatTable.inArrayPtr), indexi, maskf, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on float with invalid non-const scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> source, double* baseAddress, Vector128<int> index, Vector256<double> mask, byte scale)
                using (TestTable <double, int> doubletTable = new TestTable <double, int>(doubleSourceTable, new double[4]))
                {
                    var vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexi128, maskd, 8);
                    Unsafe.Write(doubletTable.outArrayPtr, vd);

                    if (!doubletTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y), vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double:");
                        foreach (var item in doubletTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vd = (Vector256 <double>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <double>), typeof(double *), typeof(Vector128 <int>), typeof(Vector256 <double>), typeof(byte) }).
                         Invoke(null, new object[] { sourced, Pointer.Box(doubletTable.inArrayPtr, typeof(double *)), indexi128, maskd, (byte)8 });
                    Unsafe.Write(doubletTable.outArrayPtr, vd);

                    if (!doubletTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y), vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on double:");
                        foreach (var item in doubletTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexi128, maskd, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double with invalid scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexi128, maskd, Eight);
                    Unsafe.Write(doubletTable.outArrayPtr, vd);

                    if (!doubletTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y), vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double with non-const scale (IMM):");
                        foreach (var item in doubletTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexi128, maskd, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double with invalid non-const scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<int> GatherMaskVector256(Vector256<int> source, int* baseAddress, Vector256<int> index, Vector256<int> mask, byte scale)
                using (TestTable <int, int> intTable = new TestTable <int, int>(intSourceTable, new int[8]))
                {
                    var vf = Avx2.GatherMaskVector256(sourcei, (int *)(intTable.inArrayPtr), indexi, maski, 4);
                    Unsafe.Write(intTable.outArrayPtr, vf);

                    if (!intTable.CheckResult((x, y) => x == y, intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on int:");
                        foreach (var item in intTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vf = (Vector256 <int>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <int>), typeof(int *), typeof(Vector256 <int>), typeof(Vector256 <int>), typeof(byte) }).
                         Invoke(null, new object[] { sourcei, Pointer.Box(intTable.inArrayPtr, typeof(int *)), indexi, maski, (byte)4 });
                    Unsafe.Write(intTable.outArrayPtr, vf);

                    if (!intTable.CheckResult((x, y) => x == y, intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on int:");
                        foreach (var item in intTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcei, (int *)(intTable.inArrayPtr), indexi, maski, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on int with invalid scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vf = Avx2.GatherMaskVector256(sourcei, (int *)(intTable.inArrayPtr), indexi, maski, Four);
                    Unsafe.Write(intTable.outArrayPtr, vf);

                    if (!intTable.CheckResult((x, y) => x == y, intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on int with non-const scale (IMM):");
                        foreach (var item in intTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcei, (int *)(intTable.inArrayPtr), indexi, maski, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on int with invalid non-const scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<uint> GatherMaskVector256(Vector256<uint> source, uint* baseAddress, Vector256<int> index, Vector256<uint> mask, byte scale)
                using (TestTable <int, int> intTable = new TestTable <int, int>(intSourceTable, new int[8]))
                {
                    var vf = Avx2.GatherMaskVector256(sourceui, (uint *)(intTable.inArrayPtr), indexi, maskui, 4);
                    Unsafe.Write(intTable.outArrayPtr, vf);

                    if (!intTable.CheckResult((x, y) => x == y, intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on uint:");
                        foreach (var item in intTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vf = (Vector256 <uint>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <uint>), typeof(uint *), typeof(Vector256 <int>), typeof(Vector256 <uint>), typeof(byte) }).
                         Invoke(null, new object[] { sourceui, Pointer.Box(intTable.inArrayPtr, typeof(uint *)), indexi, maskui, (byte)4 });
                    if (!intTable.CheckResult((x, y) => x == y, intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on uint:");
                        foreach (var item in intTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourceui, (uint *)(intTable.inArrayPtr), indexi, maskui, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on uint with invalid scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vf = Avx2.GatherMaskVector256(sourceui, (uint *)(intTable.inArrayPtr), indexi, maskui, Four);
                    Unsafe.Write(intTable.outArrayPtr, vf);

                    if (!intTable.CheckResult((x, y) => x == y, intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on uint with non-const scale (IMM):");
                        foreach (var item in intTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourceui, (uint *)(intTable.inArrayPtr), indexi, maskui, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on uint with invalid non-const scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<long> GatherMaskVector256(Vector256<long> source, long* baseAddress, Vector128<int> index, Vector256<long> mask, byte scale)
                using (TestTable <long, int> longTable = new TestTable <long, int>(longSourceTable, new long[4]))
                {
                    var vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexi128, maskl, 8);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vf = (Vector256 <long>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <long>), typeof(long *), typeof(Vector128 <int>), typeof(Vector256 <long>), typeof(byte) }).
                         Invoke(null, new object[] { sourcel, Pointer.Box(longTable.inArrayPtr, typeof(long *)), indexi128, maskl, (byte)8 });
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on long:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexi128, maskl, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with invalid scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexi128, maskl, Eight);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with non-const scale (IMM):");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexi128, maskl, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with invalid non-const scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<ulong> GatherMaskVector256(Vector256<ulong> source, ulong* baseAddress, Vector128<int> index, Vector256<ulong> mask, byte scale)
                using (TestTable <long, int> longTable = new TestTable <long, int>(longSourceTable, new long[4]))
                {
                    var vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexi128, maskul, 8);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on ulong:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vf = (Vector256 <ulong>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <ulong>), typeof(ulong *), typeof(Vector128 <int>), typeof(Vector256 <ulong>), typeof(byte) }).
                         Invoke(null, new object[] { sourceul, Pointer.Box(longTable.inArrayPtr, typeof(ulong *)), indexi128, maskul, (byte)8 });
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on ulong:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexi128, maskul, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on ulong with invalid scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexi128, maskul, Eight);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, vector128intIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on ulong with non-const scale (IMM):");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexi128, maskul, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on ulong with invalid non-const scale (IMM)");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<long> GatherMaskVector256(Vector256<long> source, long* baseAddress, Vector256<long> index, Vector256<long> mask, byte scale)
                using (TestTable <long, long> longTable = new TestTable <long, long>(longSourceTable, new long[4]))
                {
                    var vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexl, maskl, 8);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with Vector256 long index:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vf = (Vector256 <long>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <long>), typeof(long *), typeof(Vector256 <long>), typeof(Vector256 <long>), typeof(byte) }).
                         Invoke(null, new object[] { sourcel, Pointer.Box(longTable.inArrayPtr, typeof(long *)), indexl, maskl, (byte)8 });
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on long with Vector256 long index:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexl, maskl, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with invalid scale (IMM) and Vector256 long index");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexl, maskl, Eight);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with non-const scale (IMM) and Vector256 long index:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourcel, (long *)(longTable.inArrayPtr), indexl, maskl, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with invalid non-const scale (IMM) and Vector256 long index");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<ulong> GatherMaskVector256(Vector256<ulong> source, ulong* baseAddress, Vector256<long> index, Vector256<ulong> mask, byte scale)
                using (TestTable <long, long> longTable = new TestTable <long, long>(longSourceTable, new long[4]))
                {
                    var vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexl, maskul, 8);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on ulong with Vector256 long index:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vf = (Vector256 <ulong>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <ulong>), typeof(ulong *), typeof(Vector256 <long>), typeof(Vector256 <ulong>), typeof(byte) }).
                         Invoke(null, new object[] { sourceul, Pointer.Box(longTable.inArrayPtr, typeof(ulong *)), indexl, maskul, (byte)8 });
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on ulong with Vector256 long index:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexl, maskul, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on ulong with invalid scale (IMM) and Vector256 long index");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexl, maskul, Eight);
                    Unsafe.Write(longTable.outArrayPtr, vf);

                    if (!longTable.CheckResult((x, y) => x == y, longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on ulong with non-const scale (IMM) and Vector256 long index:");
                        foreach (var item in longTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vf = Avx2.GatherMaskVector256(sourceul, (ulong *)(longTable.inArrayPtr), indexl, maskul, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on long with invalid non-const scale (IMM) and Vector256 long index");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }

                // public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> source, double* baseAddress, Vector256<long> index, Vector256<double> mask, byte scale)
                using (TestTable <double, long> doubletTable = new TestTable <double, long>(doubleSourceTable, new double[4]))
                {
                    var vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexl, maskd, 8);
                    Unsafe.Write(doubletTable.outArrayPtr, vd);

                    if (!doubletTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y), longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double with Vector256 long index:");
                        foreach (var item in doubletTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    vd = (Vector256 <double>) typeof(Avx2).GetMethod(nameof(Avx2.GatherMaskVector256), new Type[] { typeof(Vector256 <double>), typeof(double *), typeof(Vector256 <long>), typeof(Vector256 <double>), typeof(byte) }).
                         Invoke(null, new object[] { sourced, Pointer.Box(doubletTable.inArrayPtr, typeof(double *)), indexl, maskd, (byte)8 });
                    Unsafe.Write(doubletTable.outArrayPtr, vd);

                    if (!doubletTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y), longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed with reflection on double with Vector256 long index:");
                        foreach (var item in doubletTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexl, maskd, 3);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double with invalid scale (IMM) and Vector256 long index");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }

                    vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexl, maskd, Eight);
                    Unsafe.Write(doubletTable.outArrayPtr, vd);

                    if (!doubletTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y), longIndexTable))
                    {
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double with non-const scale (IMM) and Vector256 long index:");
                        foreach (var item in doubletTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }

                    try
                    {
                        vd = Avx2.GatherMaskVector256(sourced, (double *)(doubletTable.inArrayPtr), indexl, maskd, invalid);
                        Console.WriteLine("AVX2 GatherMaskVector256 failed on double with invalid non-const scale (IMM) and Vector256 long index");
                        testResult = Fail;
                    }
                    catch (System.ArgumentOutOfRangeException)
                    {
                        // success
                    }
                }
            }



            return(testResult);
        }
Example #18
0
 public void RunStructFldScenario(ExtractStoreTest__ExtractVector128Byte1 testClass)
 {
     Avx2.ExtractVector128((Byte *)testClass._dataTable.outArrayPtr, _fld, 1);
     testClass.ValidateResult(_fld, testClass._dataTable.outArrayPtr);
 }
Example #19
0
 public void RunClassFldScenario()
 {
     Avx2.ExtractVector128((Byte *)_dataTable.outArrayPtr, _fld, 1);
     ValidateResult(_fld, _dataTable.outArrayPtr);
 }
Example #20
0
        public void RunLclFldScenario()
        {
            var test = new SimpleUnaryOpTest__ExtractVector128Byte1Store();

            Avx2.ExtractVector128((Byte *)_dataTable.outArrayPtr, test._fld, 1);
        }
Example #21
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var firstOp = Unsafe.Read <Vector256 <Byte> >(_dataTable.inArrayPtr);

            Avx2.ExtractVector128((Byte *)_dataTable.outArrayPtr, firstOp, 1);
        }
Example #22
0
        public static unsafe bool TryGetAsciiString(byte *input, char *output, int count)
        {
            Debug.Assert(input != null);
            Debug.Assert(output != null);

            var end = input + count;

            Debug.Assert((long)end >= Vector256 <sbyte> .Count);

            // PERF: so the JIT can reuse the zero from a register
            Vector128 <sbyte> zero = Vector128 <sbyte> .Zero;

            if (Sse2.IsSupported)
            {
                if (Avx2.IsSupported && input <= end - Vector256 <sbyte> .Count)
                {
                    Vector256 <sbyte> avxZero = Vector256 <sbyte> .Zero;

                    do
                    {
                        var vector = Avx.LoadVector256(input).AsSByte();
                        if (!CheckBytesInAsciiRange(vector, avxZero))
                        {
                            return(false);
                        }

                        var tmp0 = Avx2.UnpackLow(vector, avxZero);
                        var tmp1 = Avx2.UnpackHigh(vector, avxZero);

                        // Bring into the right order
                        var out0 = Avx2.Permute2x128(tmp0, tmp1, 0x20);
                        var out1 = Avx2.Permute2x128(tmp0, tmp1, 0x31);

                        Avx.Store((ushort *)output, out0.AsUInt16());
                        Avx.Store((ushort *)output + Vector256 <ushort> .Count, out1.AsUInt16());

                        input  += Vector256 <sbyte> .Count;
                        output += Vector256 <sbyte> .Count;
                    } while (input <= end - Vector256 <sbyte> .Count);

                    if (input == end)
                    {
                        return(true);
                    }
                }

                if (input <= end - Vector128 <sbyte> .Count)
                {
                    do
                    {
                        var vector = Sse2.LoadVector128(input).AsSByte();
                        if (!CheckBytesInAsciiRange(vector, zero))
                        {
                            return(false);
                        }

                        var c0 = Sse2.UnpackLow(vector, zero).AsUInt16();
                        var c1 = Sse2.UnpackHigh(vector, zero).AsUInt16();

                        Sse2.Store((ushort *)output, c0);
                        Sse2.Store((ushort *)output + Vector128 <ushort> .Count, c1);

                        input  += Vector128 <sbyte> .Count;
                        output += Vector128 <sbyte> .Count;
                    } while (input <= end - Vector128 <sbyte> .Count);

                    if (input == end)
                    {
                        return(true);
                    }
                }
            }
            else if (Vector.IsHardwareAccelerated)
            {
                while (input <= end - Vector <sbyte> .Count)
                {
                    var vector = Unsafe.AsRef <Vector <sbyte> >(input);
                    if (!CheckBytesInAsciiRange(vector))
                    {
                        return(false);
                    }

                    Vector.Widen(
                        vector,
                        out Unsafe.AsRef <Vector <short> >(output),
                        out Unsafe.AsRef <Vector <short> >(output + Vector <short> .Count));

                    input  += Vector <sbyte> .Count;
                    output += Vector <sbyte> .Count;
                }

                if (input == end)
                {
                    return(true);
                }
            }

            if (Environment.Is64BitProcess) // Use Intrinsic switch for branch elimination
            {
                // 64-bit: Loop longs by default
                while (input <= end - sizeof(long))
                {
                    var value = *(long *)input;
                    if (!CheckBytesInAsciiRange(value))
                    {
                        return(false);
                    }

                    // BMI2 could be used, but this variant is faster on both Intel and AMD.
                    if (Sse2.X64.IsSupported)
                    {
                        Vector128 <sbyte> vecNarrow = Sse2.X64.ConvertScalarToVector128Int64(value).AsSByte();
                        Vector128 <ulong> vecWide   = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
                        Sse2.Store((ulong *)output, vecWide);
                    }
                    else
                    {
                        output[0] = (char)input[0];
                        output[1] = (char)input[1];
                        output[2] = (char)input[2];
                        output[3] = (char)input[3];
                        output[4] = (char)input[4];
                        output[5] = (char)input[5];
                        output[6] = (char)input[6];
                        output[7] = (char)input[7];
                    }

                    input  += sizeof(long);
                    output += sizeof(long);
                }

                if (input <= end - sizeof(int))
                {
                    var value = *(int *)input;
                    if (!CheckBytesInAsciiRange(value))
                    {
                        return(false);
                    }

                    WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero);

                    input  += sizeof(int);
                    output += sizeof(int);
                }
            }
            else
            {
                // 32-bit: Loop ints by default
                while (input <= end - sizeof(int))
                {
                    var value = *(int *)input;
                    if (!CheckBytesInAsciiRange(value))
                    {
                        return(false);
                    }

                    WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero);

                    input  += sizeof(int);
                    output += sizeof(int);
                }
            }

            if (input <= end - sizeof(short))
            {
                if (!CheckBytesInAsciiRange(((short *)input)[0]))
                {
                    return(false);
                }

                output[0] = (char)input[0];
                output[1] = (char)input[1];

                input  += sizeof(short);
                output += sizeof(short);
            }

            if (input < end)
            {
                if (!CheckBytesInAsciiRange(((sbyte *)input)[0]))
                {
                    return(false);
                }
                output[0] = (char)input[0];
            }

            return(true);
        }
        public static bool find_structural_bits(uint8_t *buf, size_t len, ParsedJson *pj)
        {
            if (len > pj->bytecapacity)
            {
                Console.WriteLine("Your ParsedJson object only supports documents up to " + pj->bytecapacity +
                                  " bytes but you are trying to process " + len + " bytes\n");
                return(false);
            }

            uint32_t *base_ptr = pj->structural_indexes;
            uint32_t  @base    = 0;

            const uint64_t even_bits = 0x5555555555555555UL;
            const uint64_t odd_bits  = ~even_bits;

            // for now, just work in 64-byte chunks
            // we have padded the input out to 64 byte multiple with the remainder being
            // zeros

            // persistent state across loop
            uint64_t prev_iter_ends_odd_backslash = 0UL; // either 0 or 1, but a 64-bit value
            uint64_t prev_iter_inside_quote       = 0UL; // either all zeros or all ones

            // effectively the very first char is considered to follow "whitespace" for the
            // purposes of psuedo-structural character detection
            uint64_t prev_iter_ends_pseudo_pred = 1UL;
            size_t   lenminus64  = len < 64 ? 0 : len - 64;
            size_t   idx         = 0;
            uint64_t structurals = 0;

            // C#: assign static readonly fields to locals before the loop
            Vector256 <byte> low_nibble_mask  = s_low_nibble_mask;
            Vector256 <byte> high_nibble_mask = s_high_nibble_mask;

            var structural_shufti_mask = Vector256.Create((byte)0x7);
            var whitespace_shufti_mask = Vector256.Create((byte)0x18);
            var slashVec       = Vector256.Create((bytechar)'\\').AsByte();
            var ffVec          = Vector128.Create((byte)0xFF).AsUInt64();
            var doubleQuoteVec = Vector256.Create((byte)'"');
            var zeroBVec       = Vector256.Create((byte)0);
            var vec7f          = Vector256.Create((byte)0x7f);

            for (; idx < lenminus64; idx += 64)
            {
                var input_lo = Avx.LoadVector256(buf + idx + 0);
                var input_hi = Avx.LoadVector256(buf + idx + 32);
                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 1: detect odd sequences of backslashes
                ////////////////////////////////////////////////////////////////////////////////////////////
                ///
                uint64_t bs_bits =
                    cmp_mask_against_input(input_lo, input_hi, slashVec);
                uint64_t start_edges = bs_bits & ~(bs_bits << 1);
                // flip lowest if we have an odd-length run at the end of the prior
                // iteration
                uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                uint64_t even_starts     = start_edges & even_start_mask;
                uint64_t odd_starts      = start_edges & ~even_start_mask;
                uint64_t even_carries    = bs_bits + even_starts;
                uint64_t odd_carries;
                // must record the carry-out of our odd-carries out of bit 63; this
                // indicates whether the sense of any edge going to the next iteration
                // should be flipped
                bool iter_ends_odd_backslash =
                    add_overflow(bs_bits, odd_starts, &odd_carries);

                odd_carries |=
                    prev_iter_ends_odd_backslash; // push in bit zero as a potential end
                // if we had an odd-numbered run at the
                // end of the previous iteration
                prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1UL : 0x0UL;
                uint64_t even_carry_ends    = even_carries & ~bs_bits;
                uint64_t odd_carry_ends     = odd_carries & ~bs_bits;
                uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                uint64_t odd_ends           = even_start_odd_end | odd_start_even_end;

                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 2: detect insides of quote pairs
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t quote_bits =
                    cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec);
                quote_bits = quote_bits & ~odd_ends;
                uint64_t quote_mask = Sse2.X64.ConvertToUInt64(Pclmulqdq.CarrylessMultiply(
                                                                   Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0));

                uint32_t cnt       = (uint32_t)hamming(structurals);
                uint32_t next_base = @base + cnt;
                while (structurals != 0)
                {
                    base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    @base += 8;
                }

                @base = next_base;

                quote_mask            ^= prev_iter_inside_quote;
                prev_iter_inside_quote =
                    (uint64_t)((int64_t)quote_mask >>
                               63);  // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code



                var v_lo = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_lo),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(),
                                          vec7f)));

                var v_hi = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_hi),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(),
                                          vec7f)));
                var tmp_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, structural_shufti_mask), zeroBVec);
                var tmp_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, structural_shufti_mask), zeroBVec);

                uint64_t structural_res_0 = (uint32_t)Avx2.MoveMask(tmp_lo);
                uint64_t structural_res_1 = (uint64_t)Avx2.MoveMask(tmp_hi);
                structurals = ~(structural_res_0 | (structural_res_1 << 32));

                var tmp_ws_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec);
                var tmp_ws_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec);

                uint64_t ws_res_0   = (uint32_t)Avx2.MoveMask(tmp_ws_lo);
                uint64_t ws_res_1   = (uint64_t)Avx2.MoveMask(tmp_ws_hi);
                uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));


                // mask off anything inside quotes
                structurals &= ~quote_mask;

                // add the real quote bits back into our bitmask as well, so we can
                // quickly traverse the strings we've spent all this trouble gathering
                structurals |= quote_bits;

                // Now, establish "pseudo-structural characters". These are non-whitespace
                // characters that are (a) outside quotes and (b) have a predecessor that's
                // either whitespace or a structural character. This means that subsequent
                // passes will get a chance to encounter the first character of every string
                // of non-whitespace and, if we're parsing an atom like true/false/null or a
                // number we can stop at the first whitespace or structural character
                // following it.

                // a qualified predecessor is something that can happen 1 position before an
                // psuedo-structural character
                uint64_t pseudo_pred         = structurals | whitespace;
                uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
                prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
                uint64_t pseudo_structurals =
                    shifted_pseudo_pred & (~whitespace) & (~quote_mask);
                structurals |= pseudo_structurals;

                // now, we've used our close quotes all we need to. So let's switch them off
                // they will be off in the quote mask and on in quote bits.
                structurals &= ~(quote_bits & ~quote_mask);

                //Console.WriteLine($"Iter: {idx}, satur: {structurals}");

                //*(uint64_t *)(pj->structurals + idx / 8) = structurals;
            }

            ////////////////
            /// we use a giant copy-paste which is ugly.
            /// but otherwise the string needs to be properly padded or else we
            /// risk invalidating the UTF-8 checks.
            ////////////
            if (idx < len)
            {
                uint8_t *tmpbuf = stackalloc uint8_t[64];
                memset(tmpbuf, 0x20, 64);
                memcpy(tmpbuf, buf + idx, len - idx);
                Vector256 <byte> input_lo = Avx.LoadVector256(tmpbuf + 0);
                Vector256 <byte> input_hi = Avx.LoadVector256(tmpbuf + 32);
                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 1: detect odd sequences of backslashes
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t bs_bits =
                    cmp_mask_against_input(input_lo, input_hi, slashVec);
                uint64_t start_edges = bs_bits & ~(bs_bits << 1);
                // flip lowest if we have an odd-length run at the end of the prior
                // iteration
                uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
                uint64_t even_starts     = start_edges & even_start_mask;
                uint64_t odd_starts      = start_edges & ~even_start_mask;
                uint64_t even_carries    = bs_bits + even_starts;

                uint64_t odd_carries;
                // must record the carry-out of our odd-carries out of bit 63; this
                // indicates whether the sense of any edge going to the next iteration
                // should be flipped
                //bool iter_ends_odd_backslash =
                add_overflow(bs_bits, odd_starts, &odd_carries);

                odd_carries |=
                    prev_iter_ends_odd_backslash; // push in bit zero as a potential end
                // if we had an odd-numbered run at the
                // end of the previous iteration
                //prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
                uint64_t even_carry_ends    = even_carries & ~bs_bits;
                uint64_t odd_carry_ends     = odd_carries & ~bs_bits;
                uint64_t even_start_odd_end = even_carry_ends & odd_bits;
                uint64_t odd_start_even_end = odd_carry_ends & even_bits;
                uint64_t odd_ends           = even_start_odd_end | odd_start_even_end;

                ////////////////////////////////////////////////////////////////////////////////////////////
                //     Step 2: detect insides of quote pairs
                ////////////////////////////////////////////////////////////////////////////////////////////

                uint64_t quote_bits =
                    cmp_mask_against_input(input_lo, input_hi, doubleQuoteVec);
                quote_bits = quote_bits & ~odd_ends;
                uint64_t quote_mask = (uint64_t)Sse2.X64.ConvertToInt64(Pclmulqdq.CarrylessMultiply(
                                                                            Vector128.Create(quote_bits, 0UL /*C# reversed*/), ffVec, 0).AsInt64());
                quote_mask ^= prev_iter_inside_quote;

                //BUG? https://github.com/dotnet/coreclr/issues/22813
                //quote_mask = 60;
                //prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20

                uint32_t cnt       = (uint32_t)hamming(structurals);
                uint32_t next_base = @base + cnt;
                while (structurals != 0)
                {
                    base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                    structurals         = structurals & (structurals - 1);
                    @base += 8;
                }
                @base = next_base;
                // How do we build up a user traversable data structure
                // first, do a 'shufti' to detect structural JSON characters
                // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
                // these go into the first 3 buckets of the comparison (1/2/4)

                // we are also interested in the four whitespace characters
                // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
                // these go into the next 2 buckets of the comparison (8/16)

                var v_lo = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_lo),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_lo.AsUInt32(), 4).AsByte(),
                                          vec7f)));

                var v_hi = Avx2.And(
                    Avx2.Shuffle(low_nibble_mask, input_hi),
                    Avx2.Shuffle(high_nibble_mask,
                                 Avx2.And(Avx2.ShiftRightLogical(input_hi.AsUInt32(), 4).AsByte(),
                                          vec7f)));
                var tmp_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, structural_shufti_mask), zeroBVec);
                var tmp_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, structural_shufti_mask), zeroBVec);

                uint64_t structural_res_0 = (uint32_t)Avx2.MoveMask(tmp_lo);
                uint64_t structural_res_1 = (uint64_t)Avx2.MoveMask(tmp_hi);
                structurals = ~(structural_res_0 | (structural_res_1 << 32));

                // this additional mask and transfer is non-trivially expensive,
                // unfortunately
                var tmp_ws_lo = Avx2.CompareEqual(
                    Avx2.And(v_lo, whitespace_shufti_mask), zeroBVec);
                var tmp_ws_hi = Avx2.CompareEqual(
                    Avx2.And(v_hi, whitespace_shufti_mask), zeroBVec);

                uint64_t ws_res_0   = (uint32_t)Avx2.MoveMask(tmp_ws_lo);
                uint64_t ws_res_1   = (uint64_t)Avx2.MoveMask(tmp_ws_hi);
                uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));


                // mask off anything inside quotes
                structurals &= ~quote_mask;

                // add the real quote bits back into our bitmask as well, so we can
                // quickly traverse the strings we've spent all this trouble gathering
                structurals |= quote_bits;

                // Now, establish "pseudo-structural characters". These are non-whitespace
                // characters that are (a) outside quotes and (b) have a predecessor that's
                // either whitespace or a structural character. This means that subsequent
                // passes will get a chance to encounter the first character of every string
                // of non-whitespace and, if we're parsing an atom like true/false/null or a
                // number we can stop at the first whitespace or structural character
                // following it.

                // a qualified predecessor is something that can happen 1 position before an
                // psuedo-structural character
                uint64_t pseudo_pred         = structurals | whitespace;
                uint64_t shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
                prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
                uint64_t pseudo_structurals =
                    shifted_pseudo_pred & (~whitespace) & (~quote_mask);
                structurals |= pseudo_structurals;

                // now, we've used our close quotes all we need to. So let's switch them off
                // they will be off in the quote mask and on in quote bits.
                structurals &= ~(quote_bits & ~quote_mask);
                //*(uint64_t *)(pj->structurals + idx / 8) = structurals;
                idx += 64;
            }
            uint32_t cnt2       = (uint32_t)hamming(structurals);
            uint32_t next_base2 = @base + cnt2;

            while (structurals != 0)
            {
                base_ptr[@base + 0] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 1] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 2] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 3] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 4] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 5] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 6] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                base_ptr[@base + 7] = (uint32_t)idx - 64 + (uint32_t)trailingzeroes(structurals);
                structurals         = structurals & (structurals - 1);
                @base += 8;
            }
            @base = next_base2;

            pj->n_structural_indexes = @base;
            if (base_ptr[pj->n_structural_indexes - 1] > len)
            {
                throw new InvalidOperationException("Internal bug");
            }
            if (len != base_ptr[pj->n_structural_indexes - 1])
            {
                // the string might not be NULL terminated, but we add a virtual NULL ending character.
                base_ptr[pj->n_structural_indexes++] = (uint32_t)len;
            }
            base_ptr[pj->n_structural_indexes] = 0; // make it safe to dereference one beyond this array

            return(true);
        }
Example #24
0
 public static i32 Xor(i32 lhs, i32 rhs) => Avx2.Xor(lhs, rhs);
Example #25
0
 public static i32 Sub(i32 lhs, i32 rhs) => Avx2.Subtract(lhs, rhs);
Example #26
0
 public static i32 RightShift(i32 lhs, byte rhs) => Avx2.ShiftRightArithmetic(lhs, rhs);
Example #27
0
        public void RunLclVarScenario_LoadAligned()
        {
            var firstOp = Avx.LoadAlignedVector256((Byte *)(_dataTable.inArrayPtr));

            Avx2.ExtractVector128((Byte *)_dataTable.outArrayPtr, firstOp, 1);
        }
Example #28
0
 public static i32 Negate(i32 lhs) => Avx2.Subtract(i32.Zero, lhs);
Example #29
0
 public void RunFldScenario()
 {
     Avx2.ExtractVector128((Byte *)_dataTable.outArrayPtr, _fld, 1);
 }
Example #30
0
 public static i32 Or(i32 lhs, i32 rhs) => Avx2.Or(lhs, rhs);