Example #1
0
 public static unsafe void Shuffle(int bytesOfType, Span <byte> source, Span <byte> destination)
 {
     fixed(byte *src = source, dest = destination)
     {
         ShuffleGeneric.shuffle_avx2(bytesOfType, 0, source.Length, src, dest);
     }
 }
        public static unsafe void Unshuffle(int bytesOfType, Span <byte> source, Span <byte> destination)
        {
#if NETCOREAPP3_0
            if (Avx2.IsSupported)
            {
                ShuffleAvx2.Unshuffle(bytesOfType, source, destination);
            }

            else if (Sse2.IsSupported)
            {
                ShuffleSse2.Unshuffle(bytesOfType, source, destination);
            }

            else
#endif
            ShuffleGeneric.Unshuffle(bytesOfType, source, destination);
        }
        /* Unshuffle a block.  This can never fail. */
        private static unsafe void unshuffle_avx2(int bytesoftype, int blocksize,
                                                  byte *_src, byte *_dest)
        {
            int vectorized_chunk_size = bytesoftype * sizeof(Vector256 <byte>);

            /* If the block size is too small to be vectorized,
             * use the generic implementation. */
            if (blocksize < vectorized_chunk_size)
            {
                ShuffleGeneric.unshuffle_avx2(bytesoftype, 0, blocksize, _src, _dest);
                return;
            }

            /* If the blocksize is not a multiple of both the typesize and
             * the vector size, round the blocksize down to the next value
             * which is a multiple of both. The vectorized unshuffle can be
             * used for that portion of the data, and the naive implementation
             * can be used for the remaining portion. */
            int vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size);

            int vectorizable_elements = vectorizable_bytes / bytesoftype;
            int total_elements        = blocksize / bytesoftype;

            /* Optimized unshuffle implementations */
            switch (bytesoftype)
            {
            case 2:
                unshuffle2_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            case 4:
                unshuffle4_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            case 8:
                unshuffle8_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            case 16:
                unshuffle16_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            default:
                /* For types larger than 16 bytes, use the AVX2 tiled unshuffle. */
                if (bytesoftype > (int)sizeof(Vector128 <byte>))
                {
                    unshuffle16_tiled_avx2(_dest, _src, vectorizable_elements, total_elements, bytesoftype);
                }
                else
                {
                    /* Non-optimized unshuffle */
                    ShuffleGeneric.unshuffle_avx2(bytesoftype, 0, blocksize, _src, _dest);

                    /* The non-optimized function covers the whole buffer,
                     * so we're done processing here. */
                    return;
                }
                break;
            }

            /* If the buffer had any bytes at the end which couldn't be handled
             * by the vectorized implementations, use the non-optimized version
             * to finish them up. */
            if (vectorizable_bytes < blocksize)
            {
                ShuffleGeneric.unshuffle_avx2(bytesoftype, vectorizable_bytes, blocksize, _src, _dest);
            }
        }