public void Detect_encoding_for_XML_file_with_LE_JP() { if (SysUtil.IsMono) { Assert.Ignore(); return; } const string resource = "xml-little-endian-jp.txt"; using (var stream = GetResourceStream(resource)) { var detector = new CharacterEncodingDetector(); var encoding = detector.Detect(stream); var offset = stream.Position; Assert.IsNotNull(encoding, "encoding detection failed"); Assert.AreEqual(0, offset, "wrong stream position"); string text; using (var reader = new StreamReader(stream, encoding)) { text = reader.ReadToEnd(); } Assert.AreEqual("Unicode", encoding.EncodingName); AssertEncoding(text, resource, Encoding.Unicode, offset); _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName); } }
public void Detect_encoding_for_XML_file_with_Shift_JIS() { const string resource = "xml-shift-jis.txt"; using (var stream = GetResourceStream(resource)) { var detector = new CharacterEncodingDetector(); var encoding = detector.Detect(stream); var offset = stream.Position; Assert.IsNotNull(encoding, "encoding detection failed"); string text; using (var reader = new StreamReader(stream, encoding)) { text = reader.ReadToEnd(); } Assert.AreEqual("Japanese (Shift-JIS)", encoding.EncodingName); AssertEncoding(text, resource, Encoding.GetEncoding("shift_jis"), offset); _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName); } }
public void Detect_encoding_for_XML_file_with_UTF16_JP() { const string resource = "xml-utf-16-jp.txt"; using (var stream = GetResourceStream(resource)) { var detector = new CharacterEncodingDetector(); var encoding = detector.Detect(stream); var offset = stream.Position; Assert.IsNotNull(encoding, "encoding detection failed"); string text; using (var reader = new StreamReader(stream, encoding)) { text = reader.ReadToEnd(); } Assert.AreEqual("Unicode (Big-Endian)", encoding.EncodingName); AssertEncoding(text, resource, Encoding.BigEndianUnicode, offset); _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName); } }
public void Detect_encoding_for_XML_file_with_GB2312() { const string resource = "xml-gb2312.txt"; using (var stream = GetResourceStream(resource)) { var detector = new CharacterEncodingDetector(); var encoding = detector.Detect(stream); var offset = stream.Position; Assert.IsNotNull(encoding, "encoding detection failed"); Assert.AreEqual(0, offset, "wrong stream position"); string text; using (var reader = new StreamReader(stream, encoding)) { text = reader.ReadToEnd(); } Assert.AreEqual("Chinese Simplified (GB2312)", encoding.EncodingName); AssertEncoding(text, resource, Encoding.GetEncoding("GB2312"), offset); _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName); } }
public void Detect_encoding_for_XML_file_with_EUC_JP() { // BUGBUGBUG (steveb): for some reason, CharDet estimates EUC-JP to be the worst fit. :( const string resource = "xml-euc-jp.txt"; using (var stream = GetResourceStream(resource)) { var detector = new CharacterEncodingDetector(); var encoding = detector.Detect(stream); var offset = stream.Position; Assert.IsNotNull(encoding, "encoding detection failed"); string text; using (var reader = new StreamReader(stream, encoding)) { text = reader.ReadToEnd(); } Assert.AreEqual("EUC-JP", encoding.EncodingName); AssertEncoding(text, resource, Encoding.GetEncoding("EUC-JP"), offset); _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName); } }