public static void GetBytes_Encoding_ReadOnlySpan_IBufferWriter_ParamChecks() { IBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes((Encoding)null, ReadOnlySpan <char> .Empty, writer)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.GetBytes(Encoding.UTF8, ReadOnlySpan <char> .Empty, (IBufferWriter <byte>)null)); }
public static void GetBytes_Encoding_ReadOnlySequence_Span() { Span <byte> destination = stackalloc byte[32]; // First try the single-segment code path. ReadOnlySequence <char> sequence = new ReadOnlySequence <char>("Hello!".ToCharArray()); Assert.Equal( expected: Encoding.UTF8.GetBytes("Hello!"), actual: destination.Slice(0, EncodingExtensions.GetBytes(Encoding.UTF8, sequence, destination)).ToArray()); // Next try the multi-segment code path. // We've intentionally split multi-char subsequences here to test flushing mechanisms. sequence = SequenceFactory.Create( new char[] { '\u0020' }, // U+0020 new char[] { '\u0061', '\u0080' }, // U+0061 and U+0080 (continues on next line) new char[] { '\ud800' }, // U+10000 (continues on next line) new char[] { }, // empty segment, just to make sure we handle it correctly new char[] { '\udc00', '\udbff' }, // (cont.) + U+10FFFF (continues on next line) new char[] { '\udfff' }, // (cont.) new char[] { '\ud800' }); // leftover data (should be replaced) Assert.Equal( expected: Encoding.UTF8.GetBytes("\u0020\u0061\u0080\U00010000\U0010FFFF\ufffd"), actual: destination.Slice(0, EncodingExtensions.GetBytes(Encoding.UTF8, sequence, destination)).ToArray()); }
public static void GetBytes_Encoding_ReadOnlySequence_IBufferWriter_ParamChecks() { ReadOnlySequence <char> sequence = new ReadOnlySequence <char>(new char[0]); IBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes((Encoding)null, sequence, writer)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.GetBytes(Encoding.UTF8, sequence, (IBufferWriter <byte>)null)); }
public static void GetBytes_Encoding_ReadOnlySequence_IBufferWriter_SingleSegment() { ReadOnlySequence <char> sequence = new ReadOnlySequence <char>("Hello".ToCharArray()); ArrayBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); long bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, sequence, writer); Assert.Equal(5, bytesWritten); Assert.Equal(Encoding.UTF8.GetBytes("Hello"), writer.WrittenSpan.ToArray()); }
[OuterLoop] // this test takes ~10 seconds on modern hardware since it operates over GBs of data public static void GetBytes_Encoding_ReadOnlySequence_IBufferWriter_LargeMultiSegment() { ReadOnlySequence <char> sequence = GetLargeRepeatingReadOnlySequence <char>(AllScalarsAsUtf16, 1500); // ~ 3.2bn chars of UTF-16 input RepeatingValidatingBufferWriter <byte> writer = new RepeatingValidatingBufferWriter <byte>(AllScalarsAsUtf8); long expectedBytesWritten = 1500 * (long)AllScalarsAsUtf8.Length; long actualBytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, sequence, writer); Assert.Equal(expectedBytesWritten, actualBytesWritten); Assert.Equal(expectedBytesWritten, writer.TotalElementsWritten); // our writer will validate as data is written to it }
public static void GetBytes_Encoding_ReadOnlySpan_IBufferWriter() { ArrayBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); // First, a small input that goes through the one-shot code path. ReadOnlySpan <char> inputData = "Hello"; long bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, inputData, writer); Assert.Equal(5, bytesWritten); Assert.Equal(Encoding.UTF8.GetBytes("Hello"), writer.WrittenSpan.ToArray()); // Then, a large input that goes through the chunked path. // We alternate between 1-char and 2-char sequences so that the input will be split in // several locations by the internal GetChars chunking logic. This helps us test // that we're flowing the 'flush' parameter through the system correctly. string largeString = string.Create(5_000_000, (object)null, (span, _) => { while (span.Length >= 3) { span[0] = '\u00EA'; // U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX span[1] = '\uD83D'; // U+1F405 TIGER span[2] = '\uDC05'; span = span.Slice(3); } // There are 2 bytes left over. Assert.Equal(2, span.Length); span[0] = 'x'; span[1] = 'y'; }); writer = new ArrayBufferWriter <byte>(); inputData = largeString + '\uD800'; // standalone lead surrogate at end of input, testing replacement bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, inputData, writer); Assert.Equal(10_000_001, bytesWritten); // 9,999,998 for data + 3 for repalcement char at end // Now make sure all of the data was encoded properly. Assert.True(Encoding.UTF8.GetBytes(largeString + "\ufffd").AsSpan().SequenceEqual(writer.WrittenSpan)); }
public static void GetBytes_Encoding_ReadOnlySequence_Span_ParamChecks() { ReadOnlySequence <char> sequence = new ReadOnlySequence <char>(new char[0]); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes((Encoding)null, sequence, Span <byte> .Empty)); }