示例#1
0
        public static void CollectColorBlueTransforms(Span <uint> bgra, int stride, int tileWidth, int tileHeight, int greenToBlue, int redToBlue, Span <int> histo)
        {
#if SUPPORTS_RUNTIME_INTRINSICS
            if (Avx2.IsSupported && tileWidth >= 16)
            {
                const int     span   = 16;
                Span <ushort> values = stackalloc ushort[span];
                var           multsr = Vector256.Create(LosslessUtils.Cst5b(redToBlue));
                var           multsg = Vector256.Create(LosslessUtils.Cst5b(greenToBlue));
                for (int y = 0; y < tileHeight; y++)
                {
                    Span <uint> srcSpan  = bgra.Slice(y * stride);
                    ref uint    inputRef = ref MemoryMarshal.GetReference(srcSpan);
                    for (nint x = 0; x <= tileWidth - span; x += span)
                    {
                        nint               input0Idx = x;
                        nint               input1Idx = x + (span / 2);
                        Vector256 <byte>   input0    = Unsafe.As <uint, Vector256 <uint> >(ref Unsafe.Add(ref inputRef, input0Idx)).AsByte();
                        Vector256 <byte>   input1    = Unsafe.As <uint, Vector256 <uint> >(ref Unsafe.Add(ref inputRef, input1Idx)).AsByte();
                        Vector256 <byte>   r0        = Avx2.Shuffle(input0, CollectColorBlueTransformsShuffleLowMask256);
                        Vector256 <byte>   r1        = Avx2.Shuffle(input1, CollectColorBlueTransformsShuffleHighMask256);
                        Vector256 <byte>   r         = Avx2.Or(r0, r1);
                        Vector256 <byte>   gb0       = Avx2.And(input0, CollectColorBlueTransformsGreenBlueMask256);
                        Vector256 <byte>   gb1       = Avx2.And(input1, CollectColorBlueTransformsGreenBlueMask256);
                        Vector256 <ushort> gb        = Avx2.PackUnsignedSaturate(gb0.AsInt32(), gb1.AsInt32());
                        Vector256 <byte>   g         = Avx2.And(gb.AsByte(), CollectColorBlueTransformsGreenMask256);
                        Vector256 <short>  a         = Avx2.MultiplyHigh(r.AsInt16(), multsr);
                        Vector256 <short>  b         = Avx2.MultiplyHigh(g.AsInt16(), multsg);
                        Vector256 <byte>   c         = Avx2.Subtract(gb.AsByte(), b.AsByte());
                        Vector256 <byte>   d         = Avx2.Subtract(c, a.AsByte());
                        Vector256 <byte>   e         = Avx2.And(d, CollectColorBlueTransformsBlueMask256);

                        ref ushort outputRef = ref MemoryMarshal.GetReference(values);
                        Unsafe.As <ushort, Vector256 <ushort> >(ref outputRef) = e.AsUInt16();

                        for (int i = 0; i < span; i++)
                        {
                            ++histo[values[i]];
                        }
                    }
                }
        public unsafe ReadOnlyProposal[] ForVectorized()
        {
            ProposalResult p = ProposalBuilder.GetSortedVectorizedInsurances();

            var insuranceId = Vector256.Create(SearchedInsuranceId);

            fixed(int *iip = p.InsuranceIds)
            {
                int i      = 0;
                int length = p.InsuranceIds.Length - Vector256 <int> .Count + 1;
                int mask   = 8;

                while (mask == 8 && i < length)
                {
                    mask = (int)Lzcnt.LeadingZeroCount(
                        (uint)Avx2.MoveMask(
                            Vector256.AsByte(
                                Avx2.CompareEqual(
                                    Avx2.LoadVector256(iip + i),
                                    insuranceId
                                    )
                                ).Reverse()
                            )
                        ) >> 2;
                    i += Vector256 <int> .Count;
                }
                i -= Vector256 <int> .Count;

                int initial = i + mask;

                if (initial == p.InsuranceIds.Length)
                {
                    return(Array.Empty <ReadOnlyProposal>());
                }

                mask = 0;
                while (mask == 0 && i < length)
                {
                    mask = (int)Lzcnt.LeadingZeroCount(
                        (uint)Avx2.MoveMask(
                            Vector256.AsByte(
                                Avx2.CompareEqual(
                                    Avx2.LoadVector256(iip + i),
                                    insuranceId
                                    )
                                )
                            )
                        ) >> 2;
                    i += Vector256 <int> .Count;
                }
                return(p.Proposals.AsSpan(initial, i - mask - initial).ToArray());
            }
        }
示例#3
0
        // removed, not testing loop speed
        //[Benchmark]
        public unsafe ReadOnlyProposal[] PositionalSortedVectorized()
        {
            ProposalResult[] proposals = ProposalBuilder.GetPositionalSortedVectorizedInsurances();
            ProposalResult   p         = proposals[SearchedInsuranceId];

            var minPremium = Vector256.Create(decimal.ToOACurrency(SearchedNetPremium));

            fixed(long *npp = p.NetPremiums)
            {
                int i       = 0;
                int initial = 0;

                for (; i < p.NetPremiums.Length - Vector256 <long> .Count + 1; i += Vector256 <long> .Count)
                {
                    int mask = (int)Lzcnt.LeadingZeroCount(
                        (uint)Avx2.MoveMask(
                            Vector256.AsByte(
                                Avx2.CompareGreaterThan(
                                    Avx2.LoadVector256(npp + i),
                                    minPremium
                                    )
                                ).Reverse()
                            )
                        ) >> 3;

                    if (mask != 8)
                    {
                        initial = i + mask;
                        break;
                    }
                }
                for (; i < p.NetPremiums.Length - Vector256 <long> .Count + 1; i += Vector256 <long> .Count)
                {
                    int mask = (int)Lzcnt.LeadingZeroCount(
                        (uint)Avx2.MoveMask(
                            Vector256.AsByte(
                                Avx2.CompareGreaterThan(
                                    Avx2.LoadVector256(npp + i),
                                    minPremium
                                    )
                                )
                            )
                        ) >> 3;
                    if (mask != 0)
                    {
                        int length = i + Vector256 <long> .Count - mask - initial;
                        return(p.Proposals.AsSpan(initial, length).ToArray());
                    }
                }
                return(p.Proposals.AsSpan(initial).ToArray());
            }
        }
        public static byte[] SHA512(byte[] data)
        {
            SHADataContext ctx = new SHADataContext(data, SHADataContext.AlgorithmWordSize._64);

            ulong *state = stackalloc ulong[8]
            {
                0x6a09e667f3bcc908,
                0xbb67ae8584caa73b,
                0x3c6ef372fe94f82b,
                0xa54ff53a5f1d36f1,
                0x510e527fade682d1,
                0x9b05688c2b3e6c1f,
                0x1f83d9abfb41bd6b,
                0x5be0cd19137e2179
            };

            ulong *schedule = stackalloc ulong[80];

            do
            {
                ctx.PrepareBlock((byte *)schedule, sizeof(ulong) * 16);
                InitScheduleSHA512(schedule);
                ProcessBlockSHA512(state, schedule);
            }while (!ctx.Complete);

            if (BitConverter.IsLittleEndian)
            {
                var hash = new byte[8 * sizeof(ulong)];

                if (Avx2.IsSupported)
                {
                    Vector256 <ulong> vec = Avx2.LoadVector256(state), vec2 = Avx2.LoadVector256(state + 4);

                    Unsafe.As <byte, Vector256 <byte> >(ref hash[0]) = Avx2.Shuffle(vec.AsByte(), ReverseEndianess_64_256);
                    Unsafe.As <byte, Vector256 <byte> >(ref hash[sizeof(ulong) * 4]) = Avx2.Shuffle(vec2.AsByte(), ReverseEndianess_64_256);
                }
                else
                {
                    fixed(byte *phash = hash)
                    ReverseEndianess(state, (ulong *)phash, 8);
                }

                return(hash);
            }
            else
            {
                return(new Span <byte>(state, sizeof(ulong) * 8).ToArray());
            }
        }
示例#5
0
 public static Vector256 <T> Vector256Add <T>(Vector256 <T> left, Vector256 <T> right) where T : struct
 {
     if (typeof(T) == typeof(byte))
     {
         return(Avx2.Add(left.AsByte(), right.AsByte()).As <byte, T>());
     }
     else if (typeof(T) == typeof(sbyte))
     {
         return(Avx2.Add(left.AsSByte(), right.AsSByte()).As <sbyte, T>());
     }
     else if (typeof(T) == typeof(short))
     {
         return(Avx2.Add(left.AsInt16(), right.AsInt16()).As <short, T>());
     }
     else if (typeof(T) == typeof(ushort))
     {
         return(Avx2.Add(left.AsUInt16(), right.AsUInt16()).As <ushort, T>());
     }
     else if (typeof(T) == typeof(int))
     {
         return(Avx2.Add(left.AsInt32(), right.AsInt32()).As <int, T>());
     }
     else if (typeof(T) == typeof(uint))
     {
         return(Avx2.Add(left.AsUInt32(), right.AsUInt32()).As <uint, T>());
     }
     else if (typeof(T) == typeof(long))
     {
         return(Avx2.Add(left.AsInt64(), right.AsInt64()).As <long, T>());
     }
     else if (typeof(T) == typeof(ulong))
     {
         return(Avx2.Add(left.AsUInt64(), right.AsUInt64()).As <ulong, T>());
     }
     else if (typeof(T) == typeof(float))
     {
         return(Avx.Add(left.AsSingle(), right.AsSingle()).As <float, T>());
     }
     else if (typeof(T) == typeof(double))
     {
         return(Avx.Add(left.AsDouble(), right.AsDouble()).As <double, T>());
     }
     else
     {
         throw new NotSupportedException();
     }
 }
示例#6
0
        public unsafe void Test_AVX_BitsToBytes()
        {
            uint             x = 0b0000_0001__0010_0011__0100_0101__0110_0111u;
            uint             y = 0b1000_1001__1010_1011__1100_1101__1110_1111u;
            Vector256 <byte> mask1, mask2, zero = Vector256 <byte> .Zero, one, ff;

            byte[] mask1_bytes = new byte[]
            {
                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
                2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
            };
            byte[] mask2_bytes = new byte[]
            {
                0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
                0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
                0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
                0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
            };

            fixed(byte *ptr = mask1_bytes) mask1 = Avx2.LoadVector256(ptr);

            fixed(byte *ptr = mask2_bytes) mask2 = Avx2.LoadVector256(ptr);

            byte one_byte = 1;

            one = Avx2.BroadcastScalarToVector256(&one_byte);
            byte ff_byte = 0xff;

            ff = Avx2.BroadcastScalarToVector256(&ff_byte);

            // ***** load **** //
            Vector256 <uint> ux         = Avx2.BroadcastScalarToVector256(&y);
            Vector256 <byte> bx         = ux.AsByte();
            Vector256 <byte> shuffled_x = Avx2.Shuffle(bx, mask1);
            Vector256 <byte> result_x   = Avx2.And(shuffled_x, mask2);

            result_x = Avx2.Min(result_x, one);

            // ***** store **** //
            Vector256 <byte> reverse_x = Avx2.CompareEqual(result_x, zero);

            reverse_x = Avx2.AndNot(reverse_x, ff);

            uint reversed_x = (uint)Avx2.MoveMask(reverse_x);

            Assert.AreEqual(reversed_x, y);
        }
示例#7
0
    internal static unsafe void ProcessTextureAvx2(Span <Color8> data)
    {
        uint registerElements = (uint)Vector256 <uint> .Count;

        registerElements.AssertEqual((uint)(sizeof(Vector256 <uint>) / sizeof(Color8)));

        uint offset;

        fixed(Color8 *dataPtr8 = data)
        {
            uint *dataPtr = (uint *)dataPtr8;

            for (offset = 0; offset + (registerElements - 1U) < data.Length; offset += registerElements)
            {
                Vector256 <uint> rawColor = Avx2.LoadVector256(dataPtr + offset);

                Vector256 <uint> alphaMask = Vector256.Create(0xFF000000U);
                Vector256 <uint> alpha     = Avx2.And(rawColor, alphaMask);

                Vector256 <ushort> lo = Avx2.UnpackLow(rawColor.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> hi = Avx2.UnpackHigh(rawColor.AsByte(), Vector256 <byte> .Zero).AsUInt16();

                const byte       offset0      = 6;
                const byte       offset1      = offset0 + 8;
                const byte       offset2      = offset1 + 8;
                const byte       offset3      = offset2 + 8;
                Vector256 <byte> alphaShuffle = Vector256.Create(
                    offset0, 0xFF, offset0, 0xFF, offset0, 0xFF, offset0, 0xFF,
                    offset1, 0xFF, offset1, 0xFF, offset1, 0xFF, offset1, 0xFF,
                    offset2, 0xFF, offset2, 0xFF, offset2, 0xFF, offset2, 0xFF,
                    offset3, 0xFF, offset3, 0xFF, offset3, 0xFF, offset3, 0xFF
                    );

                Vector256 <uint> alphaLo = Avx2.Shuffle(lo.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaHi = Avx2.Shuffle(hi.AsByte(), alphaShuffle).AsUInt32();

                Vector256 <ushort> prodLo = Avx2.MultiplyLow(lo, alphaLo.AsUInt16());
                Vector256 <ushort> prodHi = Avx2.MultiplyLow(hi, alphaHi.AsUInt16());

                Vector256 <ushort> addend = Vector256.Create((ushort)0x00FFU);

                var sumLo = Avx2.Add(prodLo, addend);
                var sumHi = Avx2.Add(prodHi, addend);

                var shiftLo = Avx2.ShiftRightLogical(sumLo, 8);
                var shiftHi = Avx2.ShiftRightLogical(sumHi, 8);

                var packed = Avx2.PackUnsignedSaturate(shiftLo.AsInt16(), shiftHi.AsInt16()).AsUInt32();

                var mask = Vector256.Create(0x00FFFFFFU);
                packed = Avx2.And(packed, mask);
                packed = Avx2.Or(packed, alpha);

                Avx2.Store(dataPtr + offset, packed);
            }
        }

        // This is unlikely to happen, but handle when there are still elements left (the texture size isn't aligned to 4)
        if (offset < data.Length)
        {
            ProcessTextureScalar(data.SliceUnsafe(offset));
        }
    }
示例#8
0
    internal static unsafe void ProcessTextureAvx2Unrolled(Span <Color8> data)
    {
        uint registerElements = (uint)Vector256 <uint> .Count * 4;

        registerElements.AssertEqual((uint)(sizeof(Vector256 <uint>) / sizeof(Color8)));

        uint offset;

        fixed(Color8 *dataPtr8 = data)
        {
            uint *dataPtr = (uint *)dataPtr8;

            for (offset = 0; offset + (registerElements - 1U) < data.Length; offset += registerElements)
            {
                Vector256 <uint> rawColor0 = Avx2.LoadVector256(dataPtr + offset + 0x00);
                Vector256 <uint> rawColor1 = Avx2.LoadVector256(dataPtr + offset + 0x08);
                Vector256 <uint> rawColor2 = Avx2.LoadVector256(dataPtr + offset + 0x10);
                Vector256 <uint> rawColor3 = Avx2.LoadVector256(dataPtr + offset + 0x18);

                Vector256 <uint> alphaMask = Vector256.Create(0xFF000000U);
                Vector256 <uint> alpha0    = Avx2.And(rawColor0, alphaMask);
                Vector256 <uint> alpha1    = Avx2.And(rawColor1, alphaMask);
                Vector256 <uint> alpha2    = Avx2.And(rawColor2, alphaMask);
                Vector256 <uint> alpha3    = Avx2.And(rawColor3, alphaMask);

                Vector256 <ushort> lo0 = Avx2.UnpackLow(rawColor0.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> lo1 = Avx2.UnpackLow(rawColor1.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> lo2 = Avx2.UnpackLow(rawColor2.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> lo3 = Avx2.UnpackLow(rawColor3.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> hi0 = Avx2.UnpackHigh(rawColor0.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> hi1 = Avx2.UnpackHigh(rawColor1.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> hi2 = Avx2.UnpackHigh(rawColor2.AsByte(), Vector256 <byte> .Zero).AsUInt16();
                Vector256 <ushort> hi3 = Avx2.UnpackHigh(rawColor3.AsByte(), Vector256 <byte> .Zero).AsUInt16();

                const byte       offset0      = 6;
                const byte       offset1      = offset0 + 8;
                const byte       offset2      = offset1 + 8;
                const byte       offset3      = offset2 + 8;
                Vector256 <byte> alphaShuffle = Vector256.Create(
                    offset0, 0xFF, offset0, 0xFF, offset0, 0xFF, offset0, 0xFF,
                    offset1, 0xFF, offset1, 0xFF, offset1, 0xFF, offset1, 0xFF,
                    offset2, 0xFF, offset2, 0xFF, offset2, 0xFF, offset2, 0xFF,
                    offset3, 0xFF, offset3, 0xFF, offset3, 0xFF, offset3, 0xFF
                    );

                Vector256 <uint> alphaLo0 = Avx2.Shuffle(lo0.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaLo1 = Avx2.Shuffle(lo1.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaLo2 = Avx2.Shuffle(lo2.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaLo3 = Avx2.Shuffle(lo3.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaHi0 = Avx2.Shuffle(hi0.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaHi1 = Avx2.Shuffle(hi1.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaHi2 = Avx2.Shuffle(hi2.AsByte(), alphaShuffle).AsUInt32();
                Vector256 <uint> alphaHi3 = Avx2.Shuffle(hi3.AsByte(), alphaShuffle).AsUInt32();

                Vector256 <ushort> prodLo0 = Avx2.MultiplyLow(lo0, alphaLo0.AsUInt16());
                Vector256 <ushort> prodLo1 = Avx2.MultiplyLow(lo1, alphaLo1.AsUInt16());
                Vector256 <ushort> prodLo2 = Avx2.MultiplyLow(lo2, alphaLo2.AsUInt16());
                Vector256 <ushort> prodLo3 = Avx2.MultiplyLow(lo3, alphaLo3.AsUInt16());
                Vector256 <ushort> prodHi0 = Avx2.MultiplyLow(hi0, alphaHi0.AsUInt16());
                Vector256 <ushort> prodHi1 = Avx2.MultiplyLow(hi1, alphaHi1.AsUInt16());
                Vector256 <ushort> prodHi2 = Avx2.MultiplyLow(hi2, alphaHi2.AsUInt16());
                Vector256 <ushort> prodHi3 = Avx2.MultiplyLow(hi3, alphaHi3.AsUInt16());

                Vector256 <ushort> addend = Vector256.Create((ushort)0x00FFU);

                var sumLo0 = Avx2.Add(prodLo0, addend);
                var sumLo1 = Avx2.Add(prodLo1, addend);
                var sumLo2 = Avx2.Add(prodLo2, addend);
                var sumLo3 = Avx2.Add(prodLo3, addend);
                var sumHi0 = Avx2.Add(prodHi0, addend);
                var sumHi1 = Avx2.Add(prodHi1, addend);
                var sumHi2 = Avx2.Add(prodHi2, addend);
                var sumHi3 = Avx2.Add(prodHi3, addend);

                var shiftLo0 = Avx2.ShiftRightLogical(sumLo0, 8);
                var shiftLo1 = Avx2.ShiftRightLogical(sumLo1, 8);
                var shiftLo2 = Avx2.ShiftRightLogical(sumLo2, 8);
                var shiftLo3 = Avx2.ShiftRightLogical(sumLo3, 8);
                var shiftHi0 = Avx2.ShiftRightLogical(sumHi0, 8);
                var shiftHi1 = Avx2.ShiftRightLogical(sumHi1, 8);
                var shiftHi2 = Avx2.ShiftRightLogical(sumHi2, 8);
                var shiftHi3 = Avx2.ShiftRightLogical(sumHi3, 8);

                var packed0 = Avx2.PackUnsignedSaturate(shiftLo0.AsInt16(), shiftHi0.AsInt16()).AsUInt32();
                var packed1 = Avx2.PackUnsignedSaturate(shiftLo1.AsInt16(), shiftHi1.AsInt16()).AsUInt32();
                var packed2 = Avx2.PackUnsignedSaturate(shiftLo2.AsInt16(), shiftHi2.AsInt16()).AsUInt32();
                var packed3 = Avx2.PackUnsignedSaturate(shiftLo3.AsInt16(), shiftHi3.AsInt16()).AsUInt32();

                var mask = Vector256.Create(0x00FFFFFFU);
                packed0 = Avx2.And(packed0, mask);
                packed1 = Avx2.And(packed1, mask);
                packed2 = Avx2.And(packed2, mask);
                packed3 = Avx2.And(packed3, mask);
                packed0 = Avx2.Or(packed0, alpha0);
                packed1 = Avx2.Or(packed1, alpha1);
                packed2 = Avx2.Or(packed2, alpha2);
                packed3 = Avx2.Or(packed3, alpha3);

                Avx2.Store(dataPtr + offset + 0x00, packed0);
                Avx2.Store(dataPtr + offset + 0x08, packed1);
                Avx2.Store(dataPtr + offset + 0x10, packed2);
                Avx2.Store(dataPtr + offset + 0x18, packed3);
            }
        }

        // This is unlikely to happen, but handle when there are still elements left (the texture size isn't aligned to 4)
        if (offset < data.Length)
        {
            ProcessTextureScalar(data.SliceUnsafe(offset));
        }
    }
示例#9
0
        /// <summary> Implemented using 'Avx2' intrinsics</summary>
        /// <remarks> Without 'Avx2' support will behave as a simple loop</remarks>
        public static unsafe int GetIndexIntrinsics(ReadOnlySpan <int> span, int item)
        {
            // Get a fixed pointer so the garbage-collector doesn't move the collection
            fixed(int *startPointer = span)
            {
                int *endPointer = startPointer + span.Length;
                int *pointer    = startPointer;

                // Query if the cpu actually supports 'Avx2' instructions
                if (Avx2.IsSupported)
                {
                    // Load '1' item into a 128 bit vector
                    Vector128 <int> itemScaler = Sse2.LoadScalarVector128(&item);

                    // Copy that first item into the other 7 slots of a 256 bit vector, this means
                    // we now have a vector that is holding 8 times the value 'item'
                    Vector256 <int> itemVector = Avx2.BroadcastScalarToVector256(itemScaler);

                    // Loop through the span 8 elements at a time (256 bit / 32 bit = 8)
                    for (; pointer + 8 < endPointer; pointer += 8)
                    {
                        // Load 8 elements from the span
                        Vector256 <int> elements = Avx.LoadVector256(pointer);

                        // Compare those 8 elements with our item. This will give us 8 values of
                        // 'FFFF' or '0000' (32 bits of either 1 or 0) in a 256 bit vector
                        Vector256 <int> elementEquals = Avx2.CompareEqual(elements, itemVector);

                        /*
                         * Because 256 bit is a too big type to work with we combine it into a single
                         * integer by taking 4 bits from each 32 bit value (bit 7, 15, 23 and 31).
                         *
                         * eq 32:       0        0        1        0        0        0        0        0        0
                         * MoveMask: 0 0 0 0  0 0 0 0  1 1 1 1  0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0
                         * Hex:         0        0        F        0        0        0        0        0        0
                         */

                        int mask = Avx2.MoveMask(elementEquals.AsByte());

                        // If we make the assumption that the item only exists in the span once then
                        // we can construct a jump table for it.
                        switch (mask)
                        {
                        case 0x0000000F:     // At element 0
                            return((int)(pointer - startPointer));

                        case 0x000000F0:     // At element 1
                            return((int)(pointer + 1 - startPointer));

                        case 0x00000F00:     // At element 2
                            return((int)(pointer + 2 - startPointer));

                        case 0x0000F000:     // At element 3
                            return((int)(pointer + 3 - startPointer));

                        case 0x000F0000:     // At element 4
                            return((int)(pointer + 4 - startPointer));

                        case 0x00F00000:     // At element 5
                            return((int)(pointer + 5 - startPointer));

                        case 0x0F000000:     // At element 6
                            return((int)(pointer + 6 - startPointer));

                        case unchecked ((int)0xF0000000):    // At element 7
                            return((int)(pointer + 7 - startPointer));

                        case 0x00000000:     // Not found
                            continue;

                        default:
                            throw new Exception("Item found in span multiple times");
                        }
                    }
                }

                // Handle the remaiming items with a simple loop
                for (; pointer < endPointer; pointer++)
                {
                    if (*pointer == item)
                    {
                        return((int)(pointer - startPointer));
                    }
                }
            }

            return(-1);
        }
示例#10
0
 public static Vector256 <uint> RotateLeftUInt32_16(this Vector256 <uint> value)
 {
     return(Avx2.Shuffle(value.AsByte(), Rot16).AsUInt32());
 }
示例#11
0
 public static Vector256 <byte> ReverseEndianness32(this Vector256 <uint> value)
 {
     return(Avx2.Shuffle(value.AsByte(), Reverse32_256));
 }
示例#12
0
 public static Vector256 <T> RotateLeftUInt32_24 <T>(this Vector256 <T> value) where T : struct
 {
     return(Avx2.Shuffle(value.AsByte(), VRot24).As <byte, T>());
 }
示例#13
0
 public static Vector256 <T> ReverseEndianness32 <T>(this Vector256 <T> value) where T : struct
 {
     return(Avx2.Shuffle(value.AsByte(), VReverse32).As <byte, T>());
 }
示例#14
0
 public static Vector256 <T> ReverseEndianness128 <T>(this Vector256 <T> a) where T : struct
 {
     return(Avx2.Shuffle(a.AsByte(), VReverse128).As <byte, T>());
 }