Exemple #1
0
        /// <summary>Test encoding and decoding of UTF8 outside the basic multilingual plane.
        ///     </summary>
        /// <remarks>
        /// Test encoding and decoding of UTF8 outside the basic multilingual plane.
        /// This is a regression test for HADOOP-9103.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestNonBasicMultilingualPlane()
        {
            // Test using the "CAT FACE" character (U+1F431)
            // See http://www.fileformat.info/info/unicode/char/1f431/index.htm
            string catFace = "\uD83D\uDC31";

            // This encodes to 4 bytes in UTF-8:
            byte[] encoded = Runtime.GetBytesForString(catFace, "UTF-8");
            Assert.Equal(4, encoded.Length);
            Assert.Equal("f09f90b1", StringUtils.ByteToHexString(encoded));
            // Decode back to String using our own decoder
            string roundTrip = UTF8.FromBytes(encoded);

            Assert.Equal(catFace, roundTrip);
        }
Exemple #2
0
 /// <summary>
 /// Test that decoding invalid UTF8 due to truncation yields the correct
 /// exception type.
 /// </summary>
 /// <exception cref="System.Exception"/>
 public virtual void TestInvalidUTF8Truncated()
 {
     // Truncated CAT FACE character -- this is a 4-byte sequence, but we
     // only have the first three bytes.
     byte[] truncated = new byte[] { unchecked ((byte)unchecked ((int)(0xF0))), unchecked (
                                         (byte)unchecked ((int)(0x9F))), unchecked ((byte)unchecked ((int)(0x90))) };
     try
     {
         UTF8.FromBytes(truncated);
         Fail("did not throw an exception");
     }
     catch (UTFDataFormatException utfde)
     {
         GenericTestUtils.AssertExceptionContains("Truncated UTF8 at f09f90", utfde);
     }
 }
Exemple #3
0
 /// <summary>Test for a 5-byte UTF8 sequence, which is now considered illegal.</summary>
 /// <exception cref="System.Exception"/>
 public virtual void Test5ByteUtf8Sequence()
 {
     byte[] invalid = new byte[] { unchecked ((int)(0x01)), unchecked ((int)(0x02)), unchecked (
                                       (byte)unchecked ((int)(0xf8))), unchecked ((byte)unchecked ((int)(0x88))), unchecked (
                                       (byte)unchecked ((int)(0x80))), unchecked ((byte)unchecked ((int)(0x80))), unchecked (
                                       (byte)unchecked ((int)(0x80))), unchecked ((int)(0x04)), unchecked ((int)(0x05)) };
     try
     {
         UTF8.FromBytes(invalid);
         Fail("did not throw an exception");
     }
     catch (UTFDataFormatException utfde)
     {
         GenericTestUtils.AssertExceptionContains("Invalid UTF8 at f88880808004", utfde);
     }
 }
Exemple #4
0
 /// <summary>Test that decoding invalid UTF8 throws an appropriate error message.</summary>
 /// <exception cref="System.Exception"/>
 public virtual void TestInvalidUTF8()
 {
     byte[] invalid = new byte[] { unchecked ((int)(0x01)), unchecked ((int)(0x02)), unchecked (
                                       (byte)unchecked ((int)(0xff))), unchecked ((byte)unchecked ((int)(0xff))), unchecked (
                                       (int)(0x01)), unchecked ((int)(0x02)), unchecked ((int)(0x03)), unchecked ((int)(0x04
                                                                                                                        )), unchecked ((int)(0x05)) };
     try
     {
         UTF8.FromBytes(invalid);
         Fail("did not throw an exception");
     }
     catch (UTFDataFormatException utfde)
     {
         GenericTestUtils.AssertExceptionContains("Invalid UTF8 at ffff01020304", utfde);
     }
 }