public static void GetIndexOfFirstNonAsciiByte_Boundaries() { // The purpose of this test is to make sure we're hitting all of the vectorized // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened // code paths. We shouldn't be reading beyond the boundaries we were given. // The 5 * Vector test should make sure that we're exercising all possible // code paths across both implementations. using (BoundedMemory <byte> mem = BoundedMemory.Allocate <byte>(5 * Vector <byte> .Count)) { Span <byte> bytes = mem.Span; // First, try it with all-ASCII buffers. for (int i = 0; i < bytes.Length; i++) { bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII } for (int i = bytes.Length; i >= 0; i--) { Assert.Equal(i, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); } // Then, try it with non-ASCII bytes. for (int i = bytes.Length; i >= 1; i--) { bytes[i - 1] = 0x80; // set non-ASCII Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); } } }
public static void GetIndexOfFirstNonAsciiByte_Vector128InnerLoop() { // The purpose of this test is to make sure we're identifying the correct // vector (of the two that we're reading simultaneously) when performing // the final ASCII drain at the end of the method once we've broken out // of the inner loop. using (BoundedMemory <byte> mem = BoundedMemory.Allocate <byte>(1024)) { Span <byte> bytes = mem.Span; for (int i = 0; i < bytes.Length; i++) { bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII } // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to // re-clear the vector every time. for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--) { bytes[100 + i * 13] = 0x80; // 13 is relatively prime to 32, so it ensures all possible positions are hit Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiByte(bytes)); } } }
public static void GetIndexOfFirstNonAsciiChar_Vector128InnerLoop() { // The purpose of this test is to make sure we're identifying the correct // vector (of the two that we're reading simultaneously) when performing // the final ASCII drain at the end of the method once we've broken out // of the inner loop. // // Use U+0123 instead of U+0080 for this test because if our implementation // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, // causing our test to produce a false negative. using (BoundedMemory <char> mem = BoundedMemory.Allocate <char>(1024)) { Span <char> chars = mem.Span; for (int i = 0; i < chars.Length; i++) { chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII } // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to // re-clear the vector every time. for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--) { chars[100 + i * 13] = '\u0123'; // 13 is relatively prime to 32, so it ensures all possible positions are hit Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiChar(chars)); } } }
private static void ToBytes_Test_Core(ReadOnlySpan <char> utf16Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumCharsRead, ReadOnlySpan <byte> expectedUtf8Transcoding) { // Arrange using (BoundedMemory <char> boundedSource = BoundedMemory.AllocateFromExistingData(utf16Input)) using (BoundedMemory <byte> boundedDestination = BoundedMemory.Allocate <byte>(destinationSize)) { boundedSource.MakeReadonly(); // Act OperationStatus actualOperationStatus = Utf8.FromUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumCharsRead, out int actualNumBytesWritten, replaceInvalidSequences, isFinalChunk); // Assert Assert.Equal(expectedOperationStatus, actualOperationStatus); Assert.Equal(expectedNumCharsRead, actualNumCharsRead); Assert.Equal(expectedUtf8Transcoding.Length, actualNumBytesWritten); Assert.Equal(expectedUtf8Transcoding.ToArray(), boundedDestination.Span.Slice(0, actualNumBytesWritten).ToArray()); } }
public static void GetIndexOfFirstNonAsciiChar_Boundaries() { // The purpose of this test is to make sure we're hitting all of the vectorized // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened // code paths. We shouldn't be reading beyond the boundaries we were given. // // The 5 * Vector test should make sure that we're exercising all possible // code paths across both implementations. The sizeof(char) is because we're // specifying element count, but underlying implementation reintepret casts to bytes. // // Use U+0123 instead of U+0080 for this test because if our implementation // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, // causing our test to produce a false negative. using (BoundedMemory <char> mem = BoundedMemory.Allocate <char>(5 * Vector <byte> .Count / sizeof(char))) { Span <char> chars = mem.Span; for (int i = 0; i < chars.Length; i++) { chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII } for (int i = chars.Length; i >= 0; i--) { Assert.Equal(i, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); } // Then, try it with non-ASCII bytes. for (int i = chars.Length; i >= 1; i--) { chars[i - 1] = '\u0123'; // set non-ASCII Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); } } }