Example #1
0
        public void Decode_NonUtf8()
        {
            // Unicode text with extended characters that map to interesting code points in CodePage 1252.
            var text = "abc def baz aeiouy \u20ac\u2019\u00a4\u00b6\u00c9\u00db\u00ed\u00ff";

            // The same text encoded in CodePage 1252 which happens to be an illegal sequence if decoded as Utf-8.
            var bytes = new byte[]
            {
                0x61, 0x62, 0x63, 0x20, 0x64, 0x65, 0x66, 0x20, 0x62, 0x61, 0x7a, 0x20, 0x61, 0x65, 0x69, 0x6f, 0x75, 0x79, 0x20,
                0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF
            };

            var utf8 = new UTF8Encoding(false, true);

            // bytes should not decode to UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Throws(typeof(DecoderFallbackException), () =>
                {
                    EncodedStringText.Decode(stream, utf8, SourceHashAlgorithm.Sha1);
                });

                Assert.True(stream.CanRead);
            }

            // Detect encoding should correctly pick CodePage 1252
            using (var stream = new MemoryStream(bytes))
            {
                var sourceText = EncodedStringText.Create(stream);
                Assert.Equal(text, sourceText.ToString());

                // Check for a complete Encoding implementation.
                Assert.Equal(1252, sourceText.Encoding.CodePage);
                Assert.NotNull(sourceText.Encoding.GetEncoder());
                Assert.NotNull(sourceText.Encoding.GetDecoder());
                Assert.Equal(2, sourceText.Encoding.GetMaxByteCount(1));
                Assert.Equal(1, sourceText.Encoding.GetMaxCharCount(1));
                Assert.Equal(text, sourceText.Encoding.GetString(bytes));

                Assert.True(stream.CanRead);
            }
        }
Example #2
0
        public void Decode_NonUtf8()
        {
            var utf8  = new UTF8Encoding(false, true);
            var text  = "abc def baz aeiouy " + Encoding.Default.GetString(new byte[] { 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF });
            var bytes = GetBytes(Encoding.Default, text);

            // Encoding.Default should not decode to UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Throws(typeof(DecoderFallbackException), () => EncodedStringText.Decode(stream, utf8));
                Assert.True(stream.CanRead);
            }

            // Detect encoding should correctly pick Encoding.Default
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream));
                Assert.True(stream.CanRead);
            }
        }
        public void Decode_NonUtf8()
        {
            var encoding1252 = Encoding.GetEncoding(1252);
            var utf8         = new UTF8Encoding(false, true);
            var text         = "abc def baz aeiouy äëïöüû";
            var bytes        = GetBytes(encoding1252, text);

            // 1252 should not decode to UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Throws(typeof(DecoderFallbackException), () => EncodedStringText.Decode(stream, utf8));
                Assert.True(stream.CanRead);
            }

            // Detect encoding should correctly pick 1252
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream));
                Assert.True(stream.CanRead);
            }
        }