public static void GetChars_Encoding_ReadOnlySpan_IBufferWriter_ParamChecks() { IBufferWriter <char> writer = new ArrayBufferWriter <char>(); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, ReadOnlySpan <byte> .Empty, writer)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.GetChars(Encoding.UTF8, ReadOnlySpan <byte> .Empty, (IBufferWriter <char>)null)); }
public static void GetChars_Encoding_ReadOnlySpan_IBufferWriter() { ArrayBufferWriter <char> writer = new ArrayBufferWriter <char>(); // First, a small input that goes through the one-shot code path. ReadOnlySpan <byte> inputData = Encoding.UTF8.GetBytes("Hello"); long charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, inputData, writer); Assert.Equal(5, charsWritten); Assert.Equal("Hello", writer.WrittenSpan.ToString()); // Then, a large input that goes through the chunked path. // We use U+1234 because it's a 3-byte UTF-8 sequence, which means it'll be split in // several locations by the internal GetBytes chunking logic. This helps us test // that we're flowing the 'flush' parameter through the system correctly. writer = new ArrayBufferWriter <char>(); inputData = Encoding.UTF8.GetBytes(new string('\u1234', 5_000_000)).Concat(new byte[] { 0xE0 }).ToArray(); charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, inputData, writer); Assert.Equal(5_000_001, charsWritten); // 5 MM for data, 1 for replacement char at end // Now make sure all of the data was decoded properly. Assert.Equal( expected: new string('\u1234', 5_000_000) + '\ufffd', actual: writer.WrittenSpan.ToString()); }
public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_ParamChecks() { ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(new byte[0]); IBufferWriter <char> writer = new ArrayBufferWriter <char>(); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, sequence, writer)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.GetChars(Encoding.UTF8, sequence, (IBufferWriter <char>)null)); }
public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_SingleSegment() { ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(Encoding.UTF8.GetBytes("Hello")); ArrayBufferWriter <char> writer = new ArrayBufferWriter <char>(); long charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, sequence, writer); Assert.Equal(5, charsWritten); Assert.Equal("Hello", writer.WrittenSpan.ToString()); }
[OuterLoop] // this test takes ~10 seconds on modern hardware since it operates over GBs of data public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_LargeMultiSegment() { ReadOnlySequence <byte> sequence = GetLargeRepeatingReadOnlySequence <byte>(AllScalarsAsUtf8, 1500); // ~ 6.5bn bytes of UTF-8 input RepeatingValidatingBufferWriter <char> writer = new RepeatingValidatingBufferWriter <char>(AllScalarsAsUtf16); long expectedCharsWritten = 1500 * (long)AllScalarsAsUtf16.Length; long actualCharsWritten = EncodingExtensions.GetChars(Encoding.UTF8, sequence, writer); Assert.Equal(expectedCharsWritten, actualCharsWritten); Assert.Equal(expectedCharsWritten, writer.TotalElementsWritten); // our writer will validate as data is written to it }
public static void GetChars_Encoding_ReadOnlySequence_Span() { Span <char> destination = stackalloc char[32]; // First try the single-segment code path. ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(Encoding.UTF8.GetBytes("Hello!")); Assert.Equal("Hello!", destination.Slice(0, EncodingExtensions.GetChars(Encoding.UTF8, sequence, destination)).ToString()); // Next try the multi-segment code path. // We've intentionally split multi-byte subsequences here to test flushing mechanisms. sequence = SequenceFactory.Create( new byte[] { 0x20 }, // U+0020 new byte[] { 0x61, 0xC2 }, // U+0061 and U+0080 (continues on next line) new byte[] { 0x80, 0xED }, // (cont.) + U+D7FF (continues on next line) new byte[] { }, // empty segment, just to make sure we handle it correctly new byte[] { 0x9F, 0xBF, 0xF4, 0x80 }, // (cont.) + U+100000 (continues on next line) new byte[] { 0x80, 0x80 }, // (cont.) new byte[] { 0xC2 }); // leftover data (should be replaced) Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", destination.Slice(0, EncodingExtensions.GetChars(Encoding.UTF8, sequence, destination)).ToString()); }
public static void GetChars_Encoding_ReadOnlySequence_Span_ParamChecks() { ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(new byte[0]); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, sequence, Span <char> .Empty)); }