public void TestUtf32BE() { Encoding penc = new PythonSurrogatePassEncoding(new UTF32Encoding(bigEndian: true, byteOrderMark: false)); // lone high surrogate Assert.AreEqual("\x00\x00\xd8\x10".AsBytes(), penc.GetBytes("\ud810")); // lone low surrogate Assert.AreEqual("\x00\x00\xdc\n".AsBytes(), penc.GetBytes("\udc0a")); // invalid surrogate pair (low, high) Assert.AreEqual("\x00\x00\xdeQ\x00\x00\xda/".AsBytes(), penc.GetBytes("\ude51\uda2f")); }
public void TestUtf16BE() { Encoding penc = new PythonSurrogatePassEncoding(Encoding.BigEndianUnicode); // lone high surrogate Assert.AreEqual("\xd8\x10".AsBytes(), penc.GetBytes("\ud810")); // lone low surrogate Assert.AreEqual("\xdc\n".AsBytes(), penc.GetBytes("\udc0a")); // invalid surrogate pair (low, high) Assert.AreEqual("\xdeQ\xda/".AsBytes(), penc.GetBytes("\ude51\uda2f")); }
public void TestUtf16LE() { Encoding penc = new PythonSurrogatePassEncoding(Encoding.Unicode); // lone high surrogate Assert.AreEqual("\x10\xd8".AsBytes(), penc.GetBytes("\ud810")); // lone low surrogate Assert.AreEqual("\n\xdc".AsBytes(), penc.GetBytes("\udc0a")); // invalid surrogate pair (low, high) Assert.AreEqual("Q\xde/\xda".AsBytes(), penc.GetBytes("\ude51\uda2f")); }
public void TestUtf8() { Encoding penc = new PythonSurrogatePassEncoding(Encoding.UTF8); // lone high surrogate Assert.AreEqual("abc\xed\xa0\x90xyz".AsBytes(), penc.GetBytes("abc\ud810xyz")); // lone low surrogate Assert.AreEqual("abc\xed\xb0\x8axyz".AsBytes(), penc.GetBytes("abc\udc0axyz")); // invalid surrogate pair (low, high) Assert.AreEqual("abc\xed\xb9\x91\xed\xa8\xafxyz".AsBytes(), penc.GetBytes("abc\ude51\uda2fxyz")); }
public void TestUtf7() { // "surrogatepass" is not supported for UTF-7 per se, // but UTF-7 is supposed to encode any surogate characters into its ASCII mangled form // without requiring any fallback support Encoding penc = new PythonSurrogatePassEncoding(new UTF7Encoding(allowOptionals: true)); // lone high surrogate Assert.AreEqual("abc+2BA-xyz".AsBytes(), penc.GetBytes("abc\ud810xyz")); // lone low surrogate Assert.AreEqual("abc+3Ao-xyz".AsBytes(), penc.GetBytes("abc\udc0axyz")); // invalid surrogate pair (low, high) Assert.AreEqual("abc+3lHaLw-xyz".AsBytes(), penc.GetBytes("abc\ude51\uda2fxyz")); }
public void TestAscii() { // 'surrogatepass' is supported only for UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, and UTF-32BE // nevertheless, it can be used with other encodings as long as there are no encoding errors Encoding penc = new PythonSurrogatePassEncoding(Encoding.ASCII); // clean ASCII Assert.AreEqual("abc".AsBytes(), penc.GetBytes("abc")); // Attempting to encode surrogates to ASCII will throw an exception. // Note that this is CPython 3.5 behaviour, CPython 3.4 will happily contaminate ASCII with UTF-8 encoded surrogates. // lone high surrogate Assert.Throws <EncoderFallbackException>(() => penc.GetBytes("\ud810")); // lone low surrogate Assert.Throws <EncoderFallbackException>(() => penc.GetBytes("\udc0a")); // invalid surrogate pair (low, high) Assert.Throws <EncoderFallbackException>(() => penc.GetBytes("\ude51\uda2f")); }