Ejemplo n.º 1
0
            public void TestUtf32BE()
            {
                Encoding penc = new PythonSurrogatePassEncoding(new UTF32Encoding(bigEndian: true, byteOrderMark: false));

                // lone high surrogate
                Assert.AreEqual("\ud810", penc.GetChars("\x00\x00\xd8\x10".AsBytes()));

                // lone low surrogate
                Assert.AreEqual("\udc0a", penc.GetChars("\x00\x00\xdc\n".AsBytes()));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("\ude51\uda2f", penc.GetChars("\x00\x00\xdeQ\x00\x00\xda/".AsBytes()));
            }
Ejemplo n.º 2
0
            public void TestUtf16BE()
            {
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.BigEndianUnicode);

                // lone high surrogate
                Assert.AreEqual("\ud810", penc.GetChars("\xd8\x10".AsBytes()));

                // lone low surrogate
                Assert.AreEqual("\udc0a", penc.GetChars("\xdc\n".AsBytes()));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("\ude51\uda2f", penc.GetChars("\xdeQ\xda/".AsBytes()));
            }
Ejemplo n.º 3
0
            public void TestUtf16LE()
            {
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.Unicode);

                // lone high surrogate
                Assert.AreEqual("\ud810", penc.GetChars("\x10\xd8".AsBytes()));

                // lone low surrogate
                Assert.AreEqual("\udc0a", penc.GetChars("\n\xdc".AsBytes()));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("\ude51\uda2f", penc.GetChars("Q\xde/\xda".AsBytes()));
            }
Ejemplo n.º 4
0
            public void TestUtf7()
            {
                // "surrogatepass" is not supported for UTF-7 per se,
                // but UTF-7 is supposed to decode any surogate characters from its ASCII mangled form
                // without requiring any fallback support
                Encoding penc = new PythonSurrogatePassEncoding(new UTF7Encoding(allowOptionals: true));

                // lone high surrogate
                Assert.AreEqual("abc\ud810xyz", penc.GetChars("abc+2BA-xyz".AsBytes()));

                // lone low surrogate
                Assert.AreEqual("abc\udc0axyz", penc.GetChars("abc+3Ao-xyz".AsBytes()));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("abc\ude51\uda2fxyz", penc.GetChars("abc+3lHaLw-xyz".AsBytes()));
            }
Ejemplo n.º 5
0
            public void TestAscii()
            {
                // 'surrogatepass' is supported only for UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, and UTF-32BE
                // nevertheless, it can be used with other encodings as long as there are no encoding errors
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.ASCII);

                // clean ASCII
                Assert.AreEqual("abc", penc.GetChars("abc".AsBytes()));

                // Attempting to decode surrogates from ASCII will throw an exception.
                // Note that this is CPython 3.5 behaviour, CPython 3.4 will will blindly extract UTF-8 encoded surrogates from ASCII.

                // lone high surrogate in UTF-8
                Assert.Throws <DecoderFallbackException>(() => penc.GetChars("\xed\xa0\x90".AsBytes()));

                // lone low surrogate in UTF-8
                Assert.Throws <DecoderFallbackException>(() => penc.GetChars("\xed\xb0\x8a".AsBytes()));

                // invalid surrogate pair (low, high) in UTF-8
                Assert.Throws <DecoderFallbackException>(() => penc.GetChars("\xed\xb9\x91\xed\xa8\xaf".AsBytes()));
            }
Ejemplo n.º 6
0
            public void TestUtf8()
            {
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.UTF8);

                // lone high surrogate
                Assert.AreEqual("abc\ud810xyz", penc.GetChars("abc\xed\xa0\x90xyz".AsBytes()));

                // lone low surrogate
                Assert.AreEqual("abc\udc0axyz", penc.GetChars("abc\xed\xb0\x8axyz".AsBytes()));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("abc\ude51\uda2fxyz", penc.GetChars("abc\xed\xb9\x91\xed\xa8\xafxyz".AsBytes()));

                // valid surrogate pair (high, low)
                Assert.AreEqual("abc\uda2f\ude51xyz", penc.GetChars("abc\xed\xa8\xaf\xed\xb9\x91xyz".AsBytes()));

                var chars = new char[9];

                // broken lone high surrogate
                var bytes = "abc\xed-\xa0\x90xyz".AsBytes();

                Assert.That(() => penc.GetChars(bytes),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(3)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                var dec = penc.GetDecoder();

                Assert.That(dec.GetCharCount(bytes, 0, 4, flush: false), Is.EqualTo(3));
                Assert.That(dec.GetChars(bytes, 0, 4, chars, 0, flush: false), Is.EqualTo(3));
                Assert.That(() => dec.GetCharCount(bytes, 4, 4, flush: false),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(-1)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                // broken in a different way
                bytes = "abc\xed\xa0-\x90xyz".AsBytes();
                Assert.That(() => penc.GetChars(bytes),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(3)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                dec.Reset();
                Assert.That(dec.GetCharCount(bytes, 0, 4, flush: false), Is.EqualTo(3));
                Assert.That(dec.GetChars(bytes, 0, 4, chars, 0, flush: false), Is.EqualTo(3));
                Assert.That(() => dec.GetCharCount(bytes, 4, 4, flush: false),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(-1)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                dec.Reset();
                Assert.That(dec.GetCharCount(bytes, 0, 5, flush: false), Is.EqualTo(3));
                Assert.That(dec.GetChars(bytes, 0, 5, chars, 0, flush: false), Is.EqualTo(3));
                Assert.That(() => dec.GetCharCount(bytes, 5, 3, flush: false),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(-2)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                // unfinished surrogate sequence in the middle
                bytes = "abc\xed\xa0xyz".AsBytes();
                Assert.That(() => penc.GetChars(bytes),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(3)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                dec.Reset();
                Assert.That(dec.GetCharCount(bytes, 0, 5, flush: false), Is.EqualTo(3));
                Assert.That(dec.GetChars(bytes, 0, 5, chars, 0, flush: false), Is.EqualTo(3));
                Assert.That(() => dec.GetCharCount(bytes, 5, 2, flush: false),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(-2)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                // unfinished surrogate sequence at the end
                bytes = "abcxyz\xed\xa0".AsBytes();
                Assert.That(() => penc.GetChars(bytes),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(6)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));

                dec.Reset();
                Assert.That(dec.GetCharCount(bytes, 0, 7, flush: false), Is.EqualTo(6));
                Assert.That(dec.GetChars(bytes, 0, 7, chars, 0, flush: false), Is.EqualTo(6));
                Assert.That(() => dec.GetCharCount(bytes, 7, 1, flush: true),
                            Throws.TypeOf <DecoderFallbackException>()
                            .With.Property("Index").EqualTo(-1)
                            .And.Property("BytesUnknown").One.EqualTo(0xed));
            }