예제 #1
0
        public static void GetIndexOfFirstNonAsciiChar_Vector128InnerLoop()
        {
            // The purpose of this test is to make sure we're identifying the correct
            // vector (of the two that we're reading simultaneously) when performing
            // the final ASCII drain at the end of the method once we've broken out
            // of the inner loop.
            //
            // Use U+0123 instead of U+0080 for this test because if our implementation
            // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII,
            // causing our test to produce a false negative.

            using (BoundedMemory <char> mem = BoundedMemory.Allocate <char>(1024))
            {
                Span <char> chars = mem.Span;

                for (int i = 0; i < chars.Length; i++)
                {
                    chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII
                }

                // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to
                // re-clear the vector every time.

                for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--)
                {
                    chars[100 + i * 13] = '\u0123'; // 13 is relatively prime to 32, so it ensures all possible positions are hit
                    Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiChar(chars));
                }
            }
        }
예제 #2
0
        public static void GetIndexOfFirstNonAsciiByte_Boundaries()
        {
            // The purpose of this test is to make sure we're hitting all of the vectorized
            // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened
            // code paths. We shouldn't be reading beyond the boundaries we were given.

            // The 5 * Vector test should make sure that we're exercising all possible
            // code paths across both implementations.
            using (BoundedMemory <byte> mem = BoundedMemory.Allocate <byte>(5 * Vector <byte> .Count))
            {
                Span <byte> bytes = mem.Span;

                // First, try it with all-ASCII buffers.

                for (int i = 0; i < bytes.Length; i++)
                {
                    bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII
                }

                for (int i = bytes.Length; i >= 0; i--)
                {
                    Assert.Equal(i, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i)));
                }

                // Then, try it with non-ASCII bytes.

                for (int i = bytes.Length; i >= 1; i--)
                {
                    bytes[i - 1] = 0x80; // set non-ASCII
                    Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i)));
                }
            }
        }
        public void IsSuffix(CompareInfo compareInfo, string source, string value, CompareOptions options, bool expected, int expectedMatchLength)
        {
            if (options == CompareOptions.None)
            {
                Assert.Equal(expected, compareInfo.IsSuffix(source, value));
            }
            Assert.Equal(expected, compareInfo.IsSuffix(source, value, options));

            if ((compareInfo == s_invariantCompare) && ((options == CompareOptions.None) || (options == CompareOptions.IgnoreCase)))
            {
                StringComparison stringComparison = (options == CompareOptions.IgnoreCase) ? StringComparison.InvariantCultureIgnoreCase : StringComparison.InvariantCulture;
                Assert.Equal(expected, source.EndsWith(value, stringComparison));
                Assert.Equal(expected, source.AsSpan().EndsWith(value.AsSpan(), stringComparison));
            }

            // Now test the span version - use BoundedMemory to detect buffer overruns

            using BoundedMemory <char> sourceBoundedMemory = BoundedMemory.AllocateFromExistingData <char>(source);
            sourceBoundedMemory.MakeReadonly();

            using BoundedMemory <char> valueBoundedMemory = BoundedMemory.AllocateFromExistingData <char>(value);
            valueBoundedMemory.MakeReadonly();

            Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
            Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
            Assert.Equal(expectedMatchLength, actualMatchLength);
        }
예제 #4
0
        public static void GetIndexOfFirstNonAsciiByte_Vector128InnerLoop()
        {
            // The purpose of this test is to make sure we're identifying the correct
            // vector (of the two that we're reading simultaneously) when performing
            // the final ASCII drain at the end of the method once we've broken out
            // of the inner loop.

            using (BoundedMemory <byte> mem = BoundedMemory.Allocate <byte>(1024))
            {
                Span <byte> bytes = mem.Span;

                for (int i = 0; i < bytes.Length; i++)
                {
                    bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII
                }

                // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to
                // re-clear the vector every time.

                for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--)
                {
                    bytes[100 + i * 13] = 0x80; // 13 is relatively prime to 32, so it ensures all possible positions are hit
                    Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiByte(bytes));
                }
            }
        }
예제 #5
0
        public static void Ctor_BytePointer_ValidData_ReturnsOriginalContents()
        {
            byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)'l', (byte)'l', (byte)'o', (byte)'\0' };

            using (BoundedMemory <byte> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                Assert.Equal(u8("Hello"), new Utf8String((byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #6
0
        public static void Ctor_CharPointer_ValidData_ReturnsOriginalContents()
        {
            char[] inputData = "Hello\0".ToCharArray(); // need to manually null-terminate

            using (BoundedMemory <char> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                Assert.Equal(u8("Hello"), new Utf8String((char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #7
0
        public static void Ctor_CharPointer_Empty_ReturnsEmpty()
        {
            char[] inputData = new char[] { '\0' }; // standalone null char

            using (BoundedMemory <char> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                AssertSameAsEmpty(new Utf8String((char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #8
0
        public static void Ctor_BytePointer_InvalidData_Throws()
        {
            byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)0xFF, (byte)'l', (byte)'o', (byte)'\0' };

            using (BoundedMemory <byte> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                Assert.Throws <ArgumentException>(() => new Utf8String((byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #9
0
        public static void Ctor_BytePointer_InvalidData_FixesUpData()
        {
            byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)0xFF, (byte)'l', (byte)'o', (byte)'\0' };

            using (BoundedMemory <byte> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                Assert.Equal(u8("He\uFFFDlo"), new Utf8String((byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #10
0
        public static void Ctor_CharPointer_InvalidData_FixesUpData()
        {
            char[] inputData = new char[] { 'H', 'e', '\uD800', 'l', 'o', '\0' }; // standalone surrogate

            using (BoundedMemory <char> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                Assert.Equal(u8("He\uFFFDlo"), new Utf8String((char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #11
0
        public static void Ctor_CharPointer_ValidData_ReturnsOriginalContents()
        {
            char[] inputData = new char[] { 'H', 'e', 'l', 'l', 'o', '\0' };

            using (BoundedMemory <char> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                Assert.Equal(u8("Hello"), new Utf8String((char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #12
0
        public static void Ctor_BytePointer_Empty_ReturnsEmpty()
        {
            byte[] inputData = new byte[] { 0 }; // standalone null byte

            using (BoundedMemory <byte> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                AssertSameAsEmpty(new Utf8String((byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #13
0
        public static void Ctor_CharPointer_InvalidData_Throws()
        {
            char[] inputData = "He\ud800llo\0".ToCharArray(); // need to manually null-terminate

            using (BoundedMemory <char> boundedMemory = BoundedMemory.AllocateFromExistingData(inputData))
            {
                Assert.Throws <ArgumentException>(() => new Utf8String((char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span))));
            }
        }
예제 #14
0
        public static void NarrowUtf16ToLatin1_EmptyInput_NonNullReference()
        {
            using BoundedMemory <char> utf16Mem  = BoundedMemory.Allocate <char>(0);
            using BoundedMemory <byte> latin1Mem = BoundedMemory.Allocate <byte>(0);

            fixed(char *pUtf16 = &MemoryMarshal.GetReference(utf16Mem.Span))
            fixed(byte *pLatin1 = &MemoryMarshal.GetReference(latin1Mem.Span))
            {
                Assert.Equal(UIntPtr.Zero, _fnNarrowUtf16ToLatin1.Delegate(pUtf16, pLatin1, UIntPtr.Zero));
            }
        }
예제 #15
0
        public static void WidenLatin1ToUtf16()
        {
            using BoundedMemory <byte> latin1Mem = BoundedMemory.Allocate <byte>(128);
            using BoundedMemory <char> utf16Mem  = BoundedMemory.Allocate <char>(128);

            // Fill the source with [deterministic] pseudo-random bytes, then make readonly.

            new Random(0x12345).NextBytes(latin1Mem.Span);
            latin1Mem.MakeReadonly();

            // We'll write to the UTF-16 span.
            // We test with a variety of span lengths to test alignment and fallthrough code paths.

            ReadOnlySpan <byte> latin1Span = latin1Mem.Span;
            Span <char>         utf16Span  = utf16Mem.Span;

            for (int i = 0; i < latin1Span.Length; i++)
            {
                utf16Span.Clear(); // remove any data from previous iteration

                // First, transcode the data from Latin-1 to UTF-16.

                CallWidenLatin1ToUtf16(latin1Span.Slice(i), utf16Span.Slice(i));

                // Then, validate that the data was transcoded properly.

                for (int j = i; j < 128; j++)
                {
                    Assert.Equal((ushort)latin1Span[i], (ushort)utf16Span[i]);
                }
            }

            // Now run the test with a bunch of sliding 48-byte windows.
            // This tests that we handle correctly the scenario where neither the beginning nor the
            // end of the buffer is properly vector-aligned.

            const int WindowSize = 48;

            for (int i = 0; i < latin1Span.Length - WindowSize; i++)
            {
                utf16Span.Clear(); // remove any data from previous iteration

                // First, transcode the data from Latin-1 to UTF-16.

                CallWidenLatin1ToUtf16(latin1Span.Slice(i, WindowSize), utf16Span.Slice(i, WindowSize));

                // Then, validate that the data was transcoded properly.

                for (int j = 0; j < WindowSize; j++)
                {
                    Assert.Equal((ushort)latin1Span[i + j], (ushort)utf16Span[i + j]);
                }
            }
        }
예제 #16
0
        public static void WidenLatin1ToUtf16_EmptyInput_NonNullReference()
        {
            using BoundedMemory <byte> latin1Mem = BoundedMemory.Allocate <byte>(0);
            using BoundedMemory <char> utf16Mem  = BoundedMemory.Allocate <char>(0);

            fixed(byte *pLatin1 = &MemoryMarshal.GetReference(latin1Mem.Span))
            fixed(char *pUtf16 = &MemoryMarshal.GetReference(utf16Mem.Span))
            {
                _fnWidenLatin1ToUtf16.Delegate(pLatin1, pUtf16, UIntPtr.Zero); // just want to make sure it doesn't AV
            }
        }
            static void RunSpanCompareTest(CompareInfo compareInfo, ReadOnlySpan <char> string1, ReadOnlySpan <char> string2, CompareOptions options, int expected)
            {
                using BoundedMemory <char> string1BoundedMemory = BoundedMemory.AllocateFromExistingData(string1);
                string1BoundedMemory.MakeReadonly();

                using BoundedMemory <char> string2BoundedMemory = BoundedMemory.AllocateFromExistingData(string2);
                string2BoundedMemory.MakeReadonly();

                Assert.Equal(expected, Math.Sign(compareInfo.Compare(string1, string2, options)));
                Assert.Equal(-expected, Math.Sign(compareInfo.Compare(string2, string1, options)));
            }
예제 #18
0
            static void RunSpanIndexOfTest(CompareInfo compareInfo, ReadOnlySpan <char> source, ReadOnlySpan <char> value, CompareOptions options, int expected)
            {
                using BoundedMemory <char> sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source);
                sourceBoundedMemory.MakeReadonly();

                using BoundedMemory <char> valueBoundedMemory = BoundedMemory.AllocateFromExistingData(value);
                valueBoundedMemory.MakeReadonly();

                Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));

                if (TryCreateRuneFrom(value, out Rune rune))
                {
                    Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, rune, options)); // try the Rune-based version
                }
            }
예제 #19
0
        public static void NarrowUtf16ToLatin1_SomeNonLatin1Input()
        {
            using BoundedMemory <char> utf16Mem  = BoundedMemory.Allocate <char>(128);
            using BoundedMemory <byte> latin1Mem = BoundedMemory.Allocate <byte>(128);

            // Fill the source with [deterministic] pseudo-random chars U+0000..U+00FF.

            Random      rnd       = new Random(0x54321);
            Span <char> utf16Span = utf16Mem.Span;

            for (int i = 0; i < utf16Span.Length; i++)
            {
                utf16Span[i] = (char)(byte)rnd.Next();
            }

            // We'll write to the Latin-1 span.

            Span <byte> latin1Span = latin1Mem.Span;

            for (int i = utf16Span.Length - 1; i >= 0; i--)
            {
                RandomNumberGenerator.Fill(latin1Span); // fill with garbage

                // First, keep track of the garbage we wrote to the destination.
                // We want to ensure it wasn't overwritten.

                byte[] expectedTrailingData = latin1Span.Slice(i).ToArray();

                // Then, set the desired byte as non-Latin-1, then check that the workhorse
                // correctly saw the data as non-Latin-1.

                utf16Span[i] = '\u0123';
                Assert.Equal(i, CallNarrowUtf16ToLatin1(utf16Span, latin1Span));

                // Next, validate that the Latin-1 data was transcoded properly.

                for (int j = 0; j < i; j++)
                {
                    Assert.Equal((ushort)utf16Span[j], (ushort)latin1Span[j]);
                }

                // Finally, validate that the trailing data wasn't overwritten with non-Latin-1 data.

                Assert.Equal(expectedTrailingData, latin1Span.Slice(i).ToArray());
            }
        }
예제 #20
0
        public static void NarrowUtf16ToAscii_SomeNonAsciiInput()
        {
            using BoundedMemory <char> utf16Mem = BoundedMemory.Allocate <char>(128);
            using BoundedMemory <byte> asciiMem = BoundedMemory.Allocate <byte>(128);

            // Fill source with 00 .. 7F.

            Span <char> utf16Span = utf16Mem.Span;

            for (int i = 0; i < utf16Span.Length; i++)
            {
                utf16Span[i] = (char)i;
            }

            // We'll write to the ASCII span.

            Span <byte> asciiSpan = asciiMem.Span;

            for (int i = utf16Span.Length - 1; i >= 0; i--)
            {
                RandomNumberGenerator.Fill(asciiSpan); // fill with garbage

                // First, keep track of the garbage we wrote to the destination.
                // We want to ensure it wasn't overwritten.

                byte[] expectedTrailingData = asciiSpan.Slice(i).ToArray();

                // Then, set the desired byte as non-ASCII, then check that the workhorse
                // correctly saw the data as non-ASCII.

                utf16Span[i] = '\u0123'; // use U+0123 instead of U+0080 since it catches inappropriate pmovmskb usage
                Assert.Equal(i, CallNarrowUtf16ToAscii(utf16Span, asciiSpan));

                // Next, validate that the ASCII data was transcoded properly.

                for (int j = 0; j < i; j++)
                {
                    Assert.Equal((ushort)utf16Span[j], (ushort)asciiSpan[j]);
                }

                // Finally, validate that the trailing data wasn't overwritten with non-ASCII data.

                Assert.Equal(expectedTrailingData, asciiSpan.Slice(i).ToArray());
            }
        }
예제 #21
0
        public static void WidenAsciiToUtf16_SomeNonAsciiInput()
        {
            using BoundedMemory <byte> asciiMem = BoundedMemory.Allocate <byte>(128);
            using BoundedMemory <char> utf16Mem = BoundedMemory.Allocate <char>(128);

            // Fill source with 00 .. 7F, then trap future writes.

            Span <byte> asciiSpan = asciiMem.Span;

            for (int i = 0; i < asciiSpan.Length; i++)
            {
                asciiSpan[i] = (byte)i;
            }

            // We'll write to the UTF-16 span.

            Span <char> utf16Span = utf16Mem.Span;

            for (int i = asciiSpan.Length - 1; i >= 0; i--)
            {
                RandomNumberGenerator.Fill(MemoryMarshal.Cast <char, byte>(utf16Span)); // fill with garbage

                // First, keep track of the garbage we wrote to the destination.
                // We want to ensure it wasn't overwritten.

                char[] expectedTrailingData = utf16Span.Slice(i).ToArray();

                // Then, set the desired byte as non-ASCII, then check that the workhorse
                // correctly saw the data as non-ASCII.

                asciiSpan[i] |= (byte)0x80;
                Assert.Equal(i, CallWidenAsciiToUtf16(asciiSpan, utf16Span));

                // Next, validate that the ASCII data was transcoded properly.

                for (int j = 0; j < i; j++)
                {
                    Assert.Equal((ushort)asciiSpan[j], (ushort)utf16Span[j]);
                }

                // Finally, validate that the trailing data wasn't overwritten with non-ASCII data.

                Assert.Equal(expectedTrailingData, utf16Span.Slice(i).ToArray());
            }
        }
        private static unsafe void GetIndexOfFirstInvalidUtf8Sequence_Test_Core(byte[] input, int expectedRetVal, int expectedRuneCount, int expectedSurrogatePairCount)
        {
            // Arrange

            using BoundedMemory <byte> boundedMemory = BoundedMemory.AllocateFromExistingData(input);
            boundedMemory.MakeReadonly();

            // Act

            int actualRetVal;
            int actualSurrogatePairCount;
            int actualRuneCount;

            fixed(byte *pInputBuffer = &MemoryMarshal.GetReference(boundedMemory.Span))
            {
                byte *pFirstInvalidByte = _getPointerToFirstInvalidByteFn.Value(pInputBuffer, input.Length, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment);

                long ptrDiff = pFirstInvalidByte - pInputBuffer;

                Assert.True((ulong)ptrDiff <= (uint)input.Length, "ptrDiff was outside expected range.");

                Assert.True(utf16CodeUnitCountAdjustment <= 0, "UTF-16 code unit count adjustment must be 0 or negative.");
                Assert.True(scalarCountAdjustment <= 0, "Scalar count adjustment must be 0 or negative.");

                actualRetVal = (ptrDiff == input.Length) ? -1 : (int)ptrDiff;

                // The last two 'out' parameters are:
                // a) The number to be added to the "bytes processed" return value to come up with the total UTF-16 code unit count, and
                // b) The number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count.

                int totalUtf16CodeUnitCount = (int)ptrDiff + utf16CodeUnitCountAdjustment;

                actualRuneCount = totalUtf16CodeUnitCount + scalarCountAdjustment;

                // Surrogate pair count is number of UTF-16 code units less the number of scalars.

                actualSurrogatePairCount = totalUtf16CodeUnitCount - actualRuneCount;
            }

            // Assert

            Assert.Equal(expectedRetVal, actualRetVal);
            Assert.Equal(expectedRuneCount, actualRuneCount);
            Assert.Equal(expectedSurrogatePairCount, actualSurrogatePairCount);
        }
예제 #23
0
        public void EncodeUtf16_OperationStatus_SurrogateHandlingEdgeCases(char[] input, int destBufferSize, bool isFinalBlock, string expectedOutput, int expectedCharsConsumed, OperationStatus expectedResult)
        {
            // Arrange

            var encoder = new ConfigurableScalarTextEncoder(_ => true); // allow all well-formed scalars

            using BoundedMemory <char> boundedInput  = BoundedMemory.AllocateFromExistingData(input);
            using BoundedMemory <char> boundedOutput = BoundedMemory.Allocate <char>(destBufferSize);

            // Act

            OperationStatus actualResult = encoder.Encode(boundedInput.Span, boundedOutput.Span, out int actualCharsConsumed, out int actualCharsWritten, isFinalBlock);

            // Assert

            Assert.Equal(expectedResult, actualResult);
            Assert.Equal(expectedCharsConsumed, actualCharsConsumed);
            Assert.Equal(expectedOutput, boundedOutput.Span.Slice(0, actualCharsWritten).ToString());
        }
예제 #24
0
        public void EncodeUtf16_OperationStatus_AlphaNumericOnly(string input, int destBufferSize, string expectedOutput, int expectedCharsConsumed, OperationStatus expectedResult)
        {
            // Arrange

            var encoder = new ConfigurableScalarTextEncoder(scalar => UnicodeUtility.IsInRangeInclusive((uint)scalar | 0x20, 'a', 'z')); // allow only [A-Za-z] unescaped

            using BoundedMemory <char> boundedInput  = BoundedMemory.AllocateFromExistingData <char>(input.AsSpan());
            using BoundedMemory <char> boundedOutput = BoundedMemory.Allocate <char>(destBufferSize);

            // Act

            OperationStatus actualResult = encoder.Encode(boundedInput.Span, boundedOutput.Span, out int actualCharsConsumed, out int actualCharsWritten);

            // Assert

            Assert.Equal(expectedResult, actualResult);
            Assert.Equal(expectedCharsConsumed, actualCharsConsumed);
            Assert.Equal(expectedOutput, boundedOutput.Span.Slice(0, actualCharsWritten).ToString());
        }
예제 #25
0
            unsafe static void RunSpanSortKeyTest(CompareInfo compareInfo, ReadOnlySpan <char> source, CompareOptions options, byte[] expectedSortKey)
            {
                using BoundedMemory <char> sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source);
                sourceBoundedMemory.MakeReadonly();

                Assert.Equal(expectedSortKey.Length, compareInfo.GetSortKeyLength(sourceBoundedMemory.Span, options));

                using BoundedMemory <byte> sortKeyBoundedMemory = BoundedMemory.Allocate <byte>(expectedSortKey.Length);

                // First try with a destination which is too small - should result in an error

                Assert.Throws <ArgumentException>("destination", () => compareInfo.GetSortKey(sourceBoundedMemory.Span, sortKeyBoundedMemory.Span.Slice(1), options));

                // Next, try with a destination which is perfectly sized - should succeed

                Span <byte> sortKeyBoundedSpan = sortKeyBoundedMemory.Span;

                sortKeyBoundedSpan.Clear();

                Assert.Equal(expectedSortKey.Length, compareInfo.GetSortKey(sourceBoundedMemory.Span, sortKeyBoundedSpan, options));
                Assert.Equal(expectedSortKey, sortKeyBoundedSpan[0..expectedSortKey.Length].ToArray());
        private static void ToBytes_Test_Core(ReadOnlySpan <char> utf16Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumCharsRead, ReadOnlySpan <byte> expectedUtf8Transcoding)
        {
            // Arrange

            using (BoundedMemory <char> boundedSource = BoundedMemory.AllocateFromExistingData(utf16Input))
                using (BoundedMemory <byte> boundedDestination = BoundedMemory.Allocate <byte>(destinationSize))
                {
                    boundedSource.MakeReadonly();

                    // Act

                    OperationStatus actualOperationStatus = Utf8.FromUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumCharsRead, out int actualNumBytesWritten, replaceInvalidSequences, isFinalChunk);

                    // Assert

                    Assert.Equal(expectedOperationStatus, actualOperationStatus);
                    Assert.Equal(expectedNumCharsRead, actualNumCharsRead);
                    Assert.Equal(expectedUtf8Transcoding.Length, actualNumBytesWritten);
                    Assert.Equal(expectedUtf8Transcoding.ToArray(), boundedDestination.Span.Slice(0, actualNumBytesWritten).ToArray());
                }
        }
        private static unsafe void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(char[] input, int expectedRetVal, int expectedRuneCount, long expectedUtf8ByteCount)
        {
            // Arrange

            using BoundedMemory <char> boundedMemory = BoundedMemory.AllocateFromExistingData(input);
            boundedMemory.MakeReadonly();

            // Act

            int  actualRetVal;
            long actualUtf8CodeUnitCount;
            int  actualRuneCount;

            fixed(char *pInputBuffer = &MemoryMarshal.GetReference(boundedMemory.Span))
            {
                char *pFirstInvalidChar = _getPointerToFirstInvalidCharFn.Value(pInputBuffer, input.Length, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment);

                long ptrDiff = pFirstInvalidChar - pInputBuffer;

                Assert.True((ulong)ptrDiff <= (uint)input.Length, "ptrDiff was outside expected range.");

                Assert.True(utf8CodeUnitCountAdjustment >= 0, "UTF-16 code unit count adjustment must be non-negative.");
                Assert.True(scalarCountAdjustment <= 0, "Scalar count adjustment must be 0 or negative.");

                actualRetVal = (ptrDiff == input.Length) ? -1 : (int)ptrDiff;

                // The last two 'out' parameters are:
                // a) The number to be added to the "chars processed" return value to come up with the total UTF-8 code unit count, and
                // b) The number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count.

                actualUtf8CodeUnitCount = ptrDiff + utf8CodeUnitCountAdjustment;
                actualRuneCount         = (int)ptrDiff + scalarCountAdjustment;
            }

            // Assert

            Assert.Equal(expectedRetVal, actualRetVal);
            Assert.Equal(expectedRuneCount, actualRuneCount);
            Assert.Equal(actualUtf8CodeUnitCount, expectedUtf8ByteCount);
        }
예제 #28
0
        public static void NarrowUtf16ToLatin1_AllLatin1Input()
        {
            using BoundedMemory <char> utf16Mem  = BoundedMemory.Allocate <char>(128);
            using BoundedMemory <byte> latin1Mem = BoundedMemory.Allocate <byte>(128);

            // Fill the source with [deterministic] pseudo-random chars U+0000..U+00FF, then make readonly.

            Random      rnd       = new Random(0x54321);
            Span <char> utf16Span = utf16Mem.Span;

            for (int i = 0; i < utf16Span.Length; i++)
            {
                utf16Span[i] = (char)(byte)rnd.Next();
            }
            utf16Mem.MakeReadonly();

            // We'll write to the Latin-1 span.
            // We test with a variety of span lengths to test alignment and fallthrough code paths.

            Span <byte> latin1Span = latin1Mem.Span;

            for (int i = 0; i < utf16Span.Length; i++)
            {
                latin1Span.Clear(); // remove any data from previous iteration

                // First, validate that the workhorse saw the incoming data as all-Latin-1.

                Assert.Equal(128 - i, CallNarrowUtf16ToLatin1(utf16Span.Slice(i), latin1Span.Slice(i)));

                // Then, validate that the data was transcoded properly.

                for (int j = i; j < 128; j++)
                {
                    Assert.Equal((ushort)utf16Span[i], (ushort)latin1Span[i]);
                }
            }
        }
예제 #29
0
        public static void GetIndexOfFirstNonAsciiChar_Boundaries()
        {
            // The purpose of this test is to make sure we're hitting all of the vectorized
            // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened
            // code paths. We shouldn't be reading beyond the boundaries we were given.
            //
            // The 5 * Vector test should make sure that we're exercising all possible
            // code paths across both implementations. The sizeof(char) is because we're
            // specifying element count, but underlying implementation reintepret casts to bytes.
            //
            // Use U+0123 instead of U+0080 for this test because if our implementation
            // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII,
            // causing our test to produce a false negative.

            using (BoundedMemory <char> mem = BoundedMemory.Allocate <char>(5 * Vector <byte> .Count / sizeof(char)))
            {
                Span <char> chars = mem.Span;

                for (int i = 0; i < chars.Length; i++)
                {
                    chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII
                }

                for (int i = chars.Length; i >= 0; i--)
                {
                    Assert.Equal(i, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i)));
                }

                // Then, try it with non-ASCII bytes.

                for (int i = chars.Length; i >= 1; i--)
                {
                    chars[i - 1] = '\u0123'; // set non-ASCII
                    Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i)));
                }
            }
        }
예제 #30
0
        public static void NarrowUtf16ToAscii_AllAsciiInput()
        {
            using BoundedMemory <char> utf16Mem = BoundedMemory.Allocate <char>(128);
            using BoundedMemory <byte> asciiMem = BoundedMemory.Allocate <byte>(128);

            // Fill source with 00 .. 7F.

            Span <char> utf16Span = utf16Mem.Span;

            for (int i = 0; i < utf16Span.Length; i++)
            {
                utf16Span[i] = (char)i;
            }
            utf16Mem.MakeReadonly();

            // We'll write to the ASCII span.
            // We test with a variety of span lengths to test alignment and fallthrough code paths.

            Span <byte> asciiSpan = asciiMem.Span;

            for (int i = 0; i < utf16Span.Length; i++)
            {
                asciiSpan.Clear(); // remove any data from previous iteration

                // First, validate that the workhorse saw the incoming data as all-ASCII.

                Assert.Equal(128 - i, CallNarrowUtf16ToAscii(utf16Span.Slice(i), asciiSpan.Slice(i)));

                // Then, validate that the data was transcoded properly.

                for (int j = i; j < 128; j++)
                {
                    Assert.Equal((ushort)utf16Span[i], (ushort)asciiSpan[i]);
                }
            }
        }