private static void AssertIsInvalidFourByteSequence(byte[] invalidSequence) { Assert.Equal(4, invalidSequence.Length); byte[] knownGoodBytes = Utf8Tests.DecodeHex(GRINNING_FACE); byte[] toTest = invalidSequence.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 0, 0, 0); toTest = knownGoodBytes.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 4, 1, 1); }
private static void GetIndexOfFirstInvalidUtf8Sequence_Test_Core(string inputHex, int expectedRetVal, int expectedRuneCount, int expectedSurrogatePairCount) { byte[] inputBytes = Utf8Tests.DecodeHex(inputHex); // Run the test normally GetIndexOfFirstInvalidUtf8Sequence_Test_Core(inputBytes, expectedRetVal, expectedRuneCount, expectedSurrogatePairCount); // Then run the test with a bunch of ASCII data at the beginning (to exercise the vectorized code paths) inputBytes = Enumerable.Repeat((byte)'x', 128).Concat(inputBytes).ToArray(); GetIndexOfFirstInvalidUtf8Sequence_Test_Core(inputBytes, (expectedRetVal < 0) ? expectedRetVal : (expectedRetVal + 128), expectedRuneCount + 128, expectedSurrogatePairCount); // Then put a few more ASCII bytes at the beginning (to test that offsets are properly handled) inputBytes = Enumerable.Repeat((byte)'x', 7).Concat(inputBytes).ToArray(); GetIndexOfFirstInvalidUtf8Sequence_Test_Core(inputBytes, (expectedRetVal < 0) ? expectedRetVal : (expectedRetVal + 135), expectedRuneCount + 135, expectedSurrogatePairCount); }
private static void AssertIsInvalidThreeByteSequence(byte[] invalidSequence) { Assert.Equal(3, invalidSequence.Length); byte[] knownGoodBytes = Utf8Tests.DecodeHex(EURO_SYMBOL); byte[] toTest = invalidSequence.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at start of first DWORD GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 0, 0, 0); // Run the same tests but with extra data at the beginning so that we're inside one of // the 3-byte processing "hot loop" code paths. toTest = knownGoodBytes.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // straddling first and second DWORDs GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 3, 1, 0); toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // straddling second and third DWORDs GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 6, 2, 0); toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at end of third DWORD GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 9, 3, 0); }
private static void AssertIsInvalidTwoByteSequence(byte[] invalidSequence) { Assert.Equal(2, invalidSequence.Length); byte[] knownGoodBytes = Utf8Tests.DecodeHex(E_ACUTE); byte[] toTest = invalidSequence.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at start of first DWORD GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 0, 0, 0); toTest = knownGoodBytes.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at end of first DWORD GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 2, 1, 0); // Run the same tests but with extra data at the beginning so that we're inside one of // the 2-byte processing "hot loop" code paths. toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at start of next DWORD GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 4, 2, 0); toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at end of next DWORD GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 6, 3, 0); }