Exemple #1
0
        public void FindFirstCharToEncodeUtf8_IllFormedData_ReturnsIndexOfIllFormedSubsequence(byte[] utf8Data, int expectedIndex)
        {
            // Arrange

            var encoder = new ConfigurableScalarTextEncoder(_ => true /* allow everything */);

            // Act

            int actualIndex = encoder.FindFirstCharacterToEncodeUtf8(utf8Data);

            // Assert

            Assert.Equal(expectedIndex, actualIndex);
        }
Exemple #2
0
        public void FindFirstCharToEncodeUtf8_EmptyInput_ReturnsNegOne()
        {
            // Arrange

            var encoder = new ConfigurableScalarTextEncoder(_ => false /* disallow everything */);

            // Act

            int idxOfFirstByteToEncode = encoder.FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> .Empty);

            // Assert

            Assert.Equal(-1, idxOfFirstByteToEncode);
        }
Exemple #3
0
        public void FindFirstCharToEncodeUtf8_WellFormedData_SomeCharsDisallowed()
        {
            // Arrange

            byte[] inputBytes = Encoding.UTF8.GetBytes("\U00000040\U00000400\U00004000\U00040000"); // code units of different lengths
            var    encoder    = new ConfigurableScalarTextEncoder(codePoint => codePoint != 0x4000 /* disallow U+4000, allow all else */);

            // Act

            int idxOfFirstByteToEncode = encoder.FindFirstCharacterToEncodeUtf8(inputBytes);

            // Assert

            Assert.Equal(3, idxOfFirstByteToEncode);
        }
Exemple #4
0
        public void FindFirstCharToEncodeUtf8_WellFormedData_AllCharsAllowed()
        {
            // Arrange

            byte[] inputBytes = Encoding.UTF8.GetBytes("\U00000040\U00000400\U00004000\U00040000"); // code units of different lengths
            var    encoder    = new ConfigurableScalarTextEncoder(_ => true /* allow everything */);

            // Act

            int idxOfFirstByteToEncode = encoder.FindFirstCharacterToEncodeUtf8(inputBytes);

            // Assert

            Assert.Equal(-1, idxOfFirstByteToEncode);
        }
Exemple #5
0
        public void EncodeUtf8_EmptyInput_AlwaysSucceeds()
        {
            // Arrange

            var encoder = new ConfigurableScalarTextEncoder(_ => false /* disallow everything */);

            // Act & assert

            Assert.Equal(OperationStatus.Done, encoder.EncodeUtf8(ReadOnlySpan <byte> .Empty, Span <byte> .Empty, out int bytesConsumed, out int bytesWritten, isFinalBlock: true));
            Assert.Equal(0, bytesConsumed);
            Assert.Equal(0, bytesWritten);

            Assert.Equal(OperationStatus.Done, encoder.EncodeUtf8(ReadOnlySpan <byte> .Empty, Span <byte> .Empty, out bytesConsumed, out bytesWritten, isFinalBlock: false));
            Assert.Equal(0, bytesConsumed);
            Assert.Equal(0, bytesWritten);
        }
Exemple #6
0
        public void EncodeUtf8_WellFormedInput_DoesNotRequireEncoding_CopiedToDestinationCorrectly(int destinationSize, int expectedBytesCopied)
        {
            // This test considers input which is well-formed and doesn't need to be encoded.
            // If the destination buffer is large enough, the data should be copied in its entirety.
            // If the destination buffer is too small, only complete UTF-8 subsequences should be copied.
            // We should never copy a partial subsequence, as it would cause a future call to EncodeUtf8
            // to misinterpret the data as ill-formed.

            // Arrange

            byte[] fullUtf8Input = new byte[] {
                0xC2, 0x82,
                0x40,
                0xE2, 0x90, 0x91,
                0xF3, 0xA0, 0xA1, 0xA2,
                0x50
            };          // UTF-8 subsequences of varying length

            var encoder = new ConfigurableScalarTextEncoder(_ => true /* allow everything */);

            // Act & assert

            OperationStatus expectedOpStatus = (expectedBytesCopied == fullUtf8Input.Length) ? OperationStatus.Done : OperationStatus.DestinationTooSmall;

            byte[] destination = new byte[destinationSize];
            Assert.Equal(expectedOpStatus, encoder.EncodeUtf8(fullUtf8Input, destination, out int bytesConsumed, out int bytesWritten, isFinalBlock: true));
            Assert.Equal(expectedBytesCopied, bytesConsumed);
            Assert.Equal(expectedBytesCopied, bytesWritten);                                                               // bytes written should match bytes consumed if no encoding needs to take place
            Assert.Equal(fullUtf8Input.AsSpan(0, bytesConsumed).ToArray(), destination.AsSpan(0, bytesWritten).ToArray()); // ensure byte-for-byte copy
            Assert.True(destination.AsSpan(bytesWritten).ToArray().All(el => el == 0));                                    // all remaining bytes should be unchanged

            destination = new byte[destinationSize];
            Assert.Equal(expectedOpStatus, encoder.EncodeUtf8(fullUtf8Input, destination, out bytesConsumed, out bytesWritten, isFinalBlock: false));
            Assert.Equal(expectedBytesCopied, bytesConsumed);
            Assert.Equal(expectedBytesCopied, bytesWritten);                                                               // bytes written should match bytes consumed if no encoding needs to take place
            Assert.Equal(fullUtf8Input.AsSpan(0, bytesConsumed).ToArray(), destination.AsSpan(0, bytesWritten).ToArray()); // ensure byte-for-byte copy
            Assert.True(destination.AsSpan(bytesWritten).ToArray().All(el => el == 0));                                    // all remaining bytes should be unchanged
        }
Exemple #7
0
        public void EncodeUtf8_MixedInputWhichRequiresEncodingOrReplacement()
        {
            // Arrange

            var fullInput = new[]
            {
                new { utf8Bytes = new byte[] { 0x40 }, output = "@" },
                new { utf8Bytes = new byte[] { 0xC3, 0x85 }, output = "[00C5]" },                 // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE (encoded since odd scalar value)
                new { utf8Bytes = new byte[] { 0xC3, 0x86 }, output = "\u00C6" },                 // U+00C6 LATIN CAPITAL LETTER AE (on allow list)
                new { utf8Bytes = new byte[] { 0xFF }, output = "[FFFD]" },                       // (invalid UTF-8, replaced with encoded form of U+FFFD)
                new { utf8Bytes = new byte[] { 0xEF, 0xBF, 0xBD }, output = "[FFFD]" },           // U+FFFD REPLACEMENT CHARACTER (encoded since not on allow list)
                new { utf8Bytes = new byte[] { 0xF0, 0x90, 0x82, 0x82 }, output = "\U00010082" }, // U+10082 LINEAR B IDEOGRAM B104 DEER (not encoded since on allow list)
                new { utf8Bytes = new byte[] { 0xF0, 0x90, 0x82, 0x83 }, output = "[10083]" },    // U+10083 LINEAR B IDEOGRAM B105 EQUID (encoded since not on allow list)
            };

            var encoder = new ConfigurableScalarTextEncoder(scalarValue => (scalarValue % 2) == 0 /* allow only even-valued scalars to be represented unescaped */);

            // Act & assert

            List <byte> aggregateInputBytesSoFar = new List <byte>();
            List <byte> expectedOutputBytesSoFar = new List <byte>();

            foreach (var entry in fullInput)
            {
                int aggregateInputByteCountAtStartOfLoop = aggregateInputBytesSoFar.Count;

                byte[] destination;
                int    bytesConsumed, bytesWritten;

                for (int i = 0; i < entry.utf8Bytes.Length - 1; i++)
                {
                    aggregateInputBytesSoFar.Add(entry.utf8Bytes[i]);

                    // If not final block, partial encoding should say "needs more data".
                    // We'll try with various destination lengths just to make sure it doesn't affect result.

                    foreach (int destinationLength in new[] { expectedOutputBytesSoFar.Count, expectedOutputBytesSoFar.Count + 1024 })
                    {
                        destination = new byte[destinationLength];

                        Assert.Equal(OperationStatus.NeedMoreData, encoder.EncodeUtf8(aggregateInputBytesSoFar.ToArray(), destination, out bytesConsumed, out bytesWritten, isFinalBlock: false));
                        Assert.Equal(aggregateInputByteCountAtStartOfLoop, bytesConsumed);
                        Assert.Equal(expectedOutputBytesSoFar.Count, bytesWritten);
                        Assert.Equal(expectedOutputBytesSoFar.ToArray(), new Span <byte>(destination, 0, expectedOutputBytesSoFar.Count).ToArray());
                    }

                    // Now try it with "isFinalBlock = true" to force the U+FFFD conversion

                    destination = new byte[expectedOutputBytesSoFar.Count]; // first with not enough output space to write "[FFFD]"

                    Assert.Equal(OperationStatus.DestinationTooSmall, encoder.EncodeUtf8(aggregateInputBytesSoFar.ToArray(), destination, out bytesConsumed, out bytesWritten, isFinalBlock: true));
                    Assert.Equal(aggregateInputByteCountAtStartOfLoop, bytesConsumed);
                    Assert.Equal(expectedOutputBytesSoFar.Count, bytesWritten);
                    Assert.Equal(expectedOutputBytesSoFar.ToArray(), new Span <byte>(destination, 0, expectedOutputBytesSoFar.Count).ToArray());

                    destination = new byte[expectedOutputBytesSoFar.Count + 1024]; // then with enough output space to write "[FFFD]"

                    Assert.Equal(OperationStatus.Done, encoder.EncodeUtf8(aggregateInputBytesSoFar.ToArray(), destination, out bytesConsumed, out bytesWritten, isFinalBlock: true));
                    Assert.Equal(aggregateInputBytesSoFar.Count, bytesConsumed);
                    Assert.Equal(expectedOutputBytesSoFar.Count + "[FFFD]".Length, bytesWritten);
                    Assert.Equal(expectedOutputBytesSoFar.Concat(Encoding.UTF8.GetBytes("[FFFD]")).ToArray(), new Span <byte>(destination, 0, expectedOutputBytesSoFar.Count + "[FFFD]".Length).ToArray());
                }

                // Consume the remainder of this entry and make sure it escaped properly (if needed).

                aggregateInputBytesSoFar.Add(entry.utf8Bytes.Last());

                // First with not enough space in the destination buffer.

                destination = new byte[expectedOutputBytesSoFar.Count + Encoding.UTF8.GetByteCount(entry.output) - 1];

                Assert.Equal(OperationStatus.DestinationTooSmall, encoder.EncodeUtf8(aggregateInputBytesSoFar.ToArray(), destination, out bytesConsumed, out bytesWritten, isFinalBlock: true));
                Assert.Equal(aggregateInputByteCountAtStartOfLoop, bytesConsumed);
                Assert.Equal(expectedOutputBytesSoFar.Count, bytesWritten);
                Assert.Equal(expectedOutputBytesSoFar.ToArray(), new Span <byte>(destination, 0, expectedOutputBytesSoFar.Count).ToArray());

                // Then with exactly enough space in the destination buffer,
                // and again with more than enough space in the destination buffer.

                expectedOutputBytesSoFar.AddRange(Encoding.UTF8.GetBytes(entry.output));

                foreach (int destinationLength in new[] { expectedOutputBytesSoFar.Count, expectedOutputBytesSoFar.Count + 1024 })
                {
                    destination = new byte[destinationLength];

                    Assert.Equal(OperationStatus.Done, encoder.EncodeUtf8(aggregateInputBytesSoFar.ToArray(), destination, out bytesConsumed, out bytesWritten, isFinalBlock: false));
                    Assert.Equal(aggregateInputBytesSoFar.Count, bytesConsumed);
                    Assert.Equal(expectedOutputBytesSoFar.Count, bytesWritten);
                    Assert.Equal(expectedOutputBytesSoFar.ToArray(), new Span <byte>(destination, 0, expectedOutputBytesSoFar.Count).ToArray());
                }
            }
        }