public static unsafe void Shuffle(int bytesOfType, Span <byte> source, Span <byte> destination) { fixed(byte *src = source, dest = destination) { ShuffleGeneric.shuffle_avx2(bytesOfType, 0, source.Length, src, dest); } }
public static unsafe void Unshuffle(int bytesOfType, Span <byte> source, Span <byte> destination) { #if NETCOREAPP3_0 if (Avx2.IsSupported) { ShuffleAvx2.Unshuffle(bytesOfType, source, destination); } else if (Sse2.IsSupported) { ShuffleSse2.Unshuffle(bytesOfType, source, destination); } else #endif ShuffleGeneric.Unshuffle(bytesOfType, source, destination); }
/* Unshuffle a block. This can never fail. */ private static unsafe void unshuffle_avx2(int bytesoftype, int blocksize, byte *_src, byte *_dest) { int vectorized_chunk_size = bytesoftype * sizeof(Vector256 <byte>); /* If the block size is too small to be vectorized, * use the generic implementation. */ if (blocksize < vectorized_chunk_size) { ShuffleGeneric.unshuffle_avx2(bytesoftype, 0, blocksize, _src, _dest); return; } /* If the blocksize is not a multiple of both the typesize and * the vector size, round the blocksize down to the next value * which is a multiple of both. The vectorized unshuffle can be * used for that portion of the data, and the naive implementation * can be used for the remaining portion. */ int vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); int vectorizable_elements = vectorizable_bytes / bytesoftype; int total_elements = blocksize / bytesoftype; /* Optimized unshuffle implementations */ switch (bytesoftype) { case 2: unshuffle2_avx2(_dest, _src, vectorizable_elements, total_elements); break; case 4: unshuffle4_avx2(_dest, _src, vectorizable_elements, total_elements); break; case 8: unshuffle8_avx2(_dest, _src, vectorizable_elements, total_elements); break; case 16: unshuffle16_avx2(_dest, _src, vectorizable_elements, total_elements); break; default: /* For types larger than 16 bytes, use the AVX2 tiled unshuffle. */ if (bytesoftype > (int)sizeof(Vector128 <byte>)) { unshuffle16_tiled_avx2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); } else { /* Non-optimized unshuffle */ ShuffleGeneric.unshuffle_avx2(bytesoftype, 0, blocksize, _src, _dest); /* The non-optimized function covers the whole buffer, * so we're done processing here. */ return; } break; } /* If the buffer had any bytes at the end which couldn't be handled * by the vectorized implementations, use the non-optimized version * to finish them up. */ if (vectorizable_bytes < blocksize) { ShuffleGeneric.unshuffle_avx2(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); } }