Example #1
0
 public static unsafe void Shuffle(int bytesOfType, Span <byte> source, Span <byte> destination)
 {
     fixed(byte *src = source, dest = destination)
     {
         ShuffleGeneric.shuffle_avx2(bytesOfType, 0, source.Length, src, dest);
     }
 }
        /* Shuffle a block.  This can never fail. */
        private static unsafe void shuffle_avx2(int bytesoftype, int blocksize,
                                                byte *_src, byte *_dest)
        {
            int vectorized_chunk_size = bytesoftype * sizeof(Vector256 <byte>);

            /* If the block size is too small to be vectorized,
             * use the generic implementation. */
            if (blocksize < vectorized_chunk_size)
            {
                ShuffleGeneric.shuffle_avx2(bytesoftype, 0, blocksize, _src, _dest);
                return;
            }

            /* If the blocksize is not a multiple of both the typesize and
             * the vector size, round the blocksize down to the next value
             * which is a multiple of both. The vectorized shuffle can be
             * used for that portion of the data, and the naive implementation
             * can be used for the remaining portion. */
            int vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size);

            int vectorizable_elements = vectorizable_bytes / bytesoftype;
            int total_elements        = blocksize / bytesoftype;

            /* Optimized shuffle implementations */
            switch (bytesoftype)
            {
            case 2:
                shuffle2_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            case 4:
                shuffle4_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            case 8:
                shuffle8_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            case 16:
                shuffle16_avx2(_dest, _src, vectorizable_elements, total_elements);
                break;

            default:
                /* For types larger than 16 bytes, use the AVX2 tiled shuffle. */
                if (bytesoftype > (int)sizeof(Vector128 <byte>))
                {
                    shuffle16_tiled_avx2(_dest, _src, vectorizable_elements, total_elements, bytesoftype);
                }
                else
                {
                    /* Non-optimized shuffle */
                    ShuffleGeneric.shuffle_avx2(bytesoftype, 0, blocksize, _src, _dest);

                    /* The non-optimized function covers the whole buffer,
                     * so we're done processing here. */
                    return;
                }
                break;
            }

            /* If the buffer had any bytes at the end which couldn't be handled
             * by the vectorized implementations, use the non-optimized version
             * to finish them up. */
            if (vectorizable_bytes < blocksize)
            {
                ShuffleGeneric.shuffle_avx2(bytesoftype, vectorizable_bytes, blocksize, _src, _dest);
            }
        }