/* Shuffle a block. This can never fail. */ private static unsafe void Convert(int bytesOfType, int blocksize, byte *_src, byte *_dest) { int vectorized_chunk_size = bytesOfType * sizeof(Vector256 <byte>); /* If the block size is too small to be vectorized, * use the generic implementation. */ if (blocksize < vectorized_chunk_size) { EndiannessConverterGeneric.Convert(bytesOfType, 0, blocksize, _src, _dest); return; } /* If the blocksize is not a multiple of both the typesize and * the vector size, round the blocksize down to the next value * which is a multiple of both. The vectorized shuffle can be * used for that portion of the data, and the naive implementation * can be used for the remaining portion. */ int vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); /* Optimized shuffle implementations */ switch (bytesOfType) { case 2: EndiannessConverterAvx2.Convert2(_dest, _src, vectorizable_bytes); break; case 4: EndiannessConverterAvx2.Convert4(_dest, _src, vectorizable_bytes); break; case 8: EndiannessConverterAvx2.Convert8(_dest, _src, vectorizable_bytes); break; case 16: EndiannessConverterAvx2.Convert16(_dest, _src, vectorizable_bytes); break; default: // > 16 bytes are not supported by this implementation, fall back to generic version vectorizable_bytes = 0; break; } /* If the buffer had any bytes at the end which couldn't be handled * by the vectorized implementations, use the non-optimized version * to finish them up. */ if (vectorizable_bytes < blocksize) { EndiannessConverterGeneric.Convert(bytesOfType, vectorizable_bytes, blocksize, _src, _dest); } }
public unsafe static void Convert(int bytesOfType, Span <byte> source, Span <byte> destination) { // Actually, only the generic algorithm requires a dedicated destination buffer. // Problem: If new buffer is created in this method, hardware accelerated methods // won't know about that new buffer. If instead new buffer is created in unsafe // overload below, the newly created pointer would be lost. Returning new buffer // as simple return values would be possible, but requires a copy operation // from source to new destination in the hardware accelerated implementions // when they hand over control to the generic algorithm. // Concluding, it is easier to work with independet source and destination buffers. fixed(byte *src = source, dest = destination) { EndiannessConverterGeneric.Convert(bytesOfType, 0, source.Length, src, dest); } }
public static unsafe void Convert(int bytesOfType, Span <byte> source, Span <byte> destination) { #if NET5_0 if (Avx2.IsSupported) { EndiannessConverterAvx2.Convert(bytesOfType, source, destination); } //else if (Sse2.IsSupported) // EndiannessConverterSse2.Convert(bytesOfType, source, destination); else #endif EndiannessConverterGeneric.Convert(bytesOfType, source, destination); }