public void Decode_NonUtf8() { // Unicode text with extended characters that map to interesting code points in CodePage 1252. var text = "abc def baz aeiouy \u20ac\u2019\u00a4\u00b6\u00c9\u00db\u00ed\u00ff"; // The same text encoded in CodePage 1252 which happens to be an illegal sequence if decoded as Utf-8. var bytes = new byte[] { 0x61, 0x62, 0x63, 0x20, 0x64, 0x65, 0x66, 0x20, 0x62, 0x61, 0x7a, 0x20, 0x61, 0x65, 0x69, 0x6f, 0x75, 0x79, 0x20, 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF }; var utf8 = new UTF8Encoding(false, true); // bytes should not decode to UTF-8 using (var stream = new MemoryStream(bytes)) { Assert.Throws(typeof(DecoderFallbackException), () => { EncodedStringText.Decode(stream, utf8, SourceHashAlgorithm.Sha1); }); Assert.True(stream.CanRead); } // Detect encoding should correctly pick CodePage 1252 using (var stream = new MemoryStream(bytes)) { var sourceText = EncodedStringText.Create(stream); Assert.Equal(text, sourceText.ToString()); // Check for a complete Encoding implementation. Assert.Equal(1252, sourceText.Encoding.CodePage); Assert.NotNull(sourceText.Encoding.GetEncoder()); Assert.NotNull(sourceText.Encoding.GetDecoder()); Assert.Equal(2, sourceText.Encoding.GetMaxByteCount(1)); Assert.Equal(1, sourceText.Encoding.GetMaxCharCount(1)); Assert.Equal(text, sourceText.Encoding.GetString(bytes)); Assert.True(stream.CanRead); } }
public void Decode_NonUtf8() { var utf8 = new UTF8Encoding(false, true); var text = "abc def baz aeiouy " + Encoding.Default.GetString(new byte[] { 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF }); var bytes = GetBytes(Encoding.Default, text); // Encoding.Default should not decode to UTF-8 using (var stream = new MemoryStream(bytes)) { Assert.Throws(typeof(DecoderFallbackException), () => EncodedStringText.Decode(stream, utf8)); Assert.True(stream.CanRead); } // Detect encoding should correctly pick Encoding.Default using (var stream = new MemoryStream(bytes)) { Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream)); Assert.True(stream.CanRead); } }
public void Decode_NonUtf8() { var encoding1252 = Encoding.GetEncoding(1252); var utf8 = new UTF8Encoding(false, true); var text = "abc def baz aeiouy äëïöüû"; var bytes = GetBytes(encoding1252, text); // 1252 should not decode to UTF-8 using (var stream = new MemoryStream(bytes)) { Assert.Throws(typeof(DecoderFallbackException), () => EncodedStringText.Decode(stream, utf8)); Assert.True(stream.CanRead); } // Detect encoding should correctly pick 1252 using (var stream = new MemoryStream(bytes)) { Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream)); Assert.True(stream.CanRead); } }