示例#1
0
            public void TestUtf32BE()
            {
                Encoding penc = new PythonSurrogatePassEncoding(new UTF32Encoding(bigEndian: true, byteOrderMark: false));

                // lone high surrogate
                Assert.AreEqual("\x00\x00\xd8\x10".AsBytes(), penc.GetBytes("\ud810"));

                // lone low surrogate
                Assert.AreEqual("\x00\x00\xdc\n".AsBytes(), penc.GetBytes("\udc0a"));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("\x00\x00\xdeQ\x00\x00\xda/".AsBytes(), penc.GetBytes("\ude51\uda2f"));
            }
示例#2
0
            public void TestUtf16BE()
            {
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.BigEndianUnicode);

                // lone high surrogate
                Assert.AreEqual("\xd8\x10".AsBytes(), penc.GetBytes("\ud810"));

                // lone low surrogate
                Assert.AreEqual("\xdc\n".AsBytes(), penc.GetBytes("\udc0a"));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("\xdeQ\xda/".AsBytes(), penc.GetBytes("\ude51\uda2f"));
            }
示例#3
0
            public void TestUtf16LE()
            {
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.Unicode);

                // lone high surrogate
                Assert.AreEqual("\x10\xd8".AsBytes(), penc.GetBytes("\ud810"));

                // lone low surrogate
                Assert.AreEqual("\n\xdc".AsBytes(), penc.GetBytes("\udc0a"));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("Q\xde/\xda".AsBytes(), penc.GetBytes("\ude51\uda2f"));
            }
示例#4
0
            public void TestUtf8()
            {
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.UTF8);

                // lone high surrogate
                Assert.AreEqual("abc\xed\xa0\x90xyz".AsBytes(), penc.GetBytes("abc\ud810xyz"));

                // lone low surrogate
                Assert.AreEqual("abc\xed\xb0\x8axyz".AsBytes(), penc.GetBytes("abc\udc0axyz"));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("abc\xed\xb9\x91\xed\xa8\xafxyz".AsBytes(), penc.GetBytes("abc\ude51\uda2fxyz"));
            }
示例#5
0
            public void TestUtf7()
            {
                // "surrogatepass" is not supported for UTF-7 per se,
                // but UTF-7 is supposed to encode any surogate characters into its ASCII mangled form
                // without requiring any fallback support
                Encoding penc = new PythonSurrogatePassEncoding(new UTF7Encoding(allowOptionals: true));

                // lone high surrogate
                Assert.AreEqual("abc+2BA-xyz".AsBytes(), penc.GetBytes("abc\ud810xyz"));

                // lone low surrogate
                Assert.AreEqual("abc+3Ao-xyz".AsBytes(), penc.GetBytes("abc\udc0axyz"));

                // invalid surrogate pair (low, high)
                Assert.AreEqual("abc+3lHaLw-xyz".AsBytes(), penc.GetBytes("abc\ude51\uda2fxyz"));
            }
示例#6
0
            public void TestAscii()
            {
                // 'surrogatepass' is supported only for UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, and UTF-32BE
                // nevertheless, it can be used with other encodings as long as there are no encoding errors
                Encoding penc = new PythonSurrogatePassEncoding(Encoding.ASCII);

                // clean ASCII
                Assert.AreEqual("abc".AsBytes(), penc.GetBytes("abc"));

                // Attempting to encode surrogates to ASCII will throw an exception.
                // Note that this is CPython 3.5 behaviour, CPython 3.4 will happily contaminate ASCII with UTF-8 encoded surrogates.

                // lone high surrogate
                Assert.Throws <EncoderFallbackException>(() => penc.GetBytes("\ud810"));

                // lone low surrogate
                Assert.Throws <EncoderFallbackException>(() => penc.GetBytes("\udc0a"));

                // invalid surrogate pair (low, high)
                Assert.Throws <EncoderFallbackException>(() => penc.GetBytes("\ude51\uda2f"));
            }