Пример #1
0
        public void Detect_encoding_for_XML_file_with_UTF8_JP()
        {
            const string resource = "xml-utf-8-jp.txt";
            using(var stream = GetResourceStream(resource)) {
                var detector = new CharacterEncodingDetector();
                var encoding = detector.Detect(stream);
                var offset = stream.Position;
                Assert.IsNotNull(encoding, "encoding detection failed");
                Assert.AreEqual(0, offset, "wrong stream position");

                string text;
                using(var reader = new StreamReader(stream, encoding)) {
                    text = reader.ReadToEnd();
                }

                Assert.AreEqual("Unicode (UTF-8)", encoding.EncodingName);
                AssertEncoding(text, resource, Encoding.UTF8, offset);
                _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName);
            }
        }
Пример #2
0
        public void Detect_encoding_for_XML_file_with_Shift_JIS()
        {
            const string resource = "xml-shift-jis.txt";
            using(var stream = GetResourceStream(resource)) {
                var detector = new CharacterEncodingDetector();
                var encoding = detector.Detect(stream);
                var offset = stream.Position;
                Assert.IsNotNull(encoding, "encoding detection failed");
                Assert.AreEqual(0, offset, "wrong stream position");

                string text;
                using(var reader = new StreamReader(stream, encoding)) {
                    text = reader.ReadToEnd();
                }

                Assert.AreEqual("Japanese (Shift-JIS)", encoding.EncodingName);
                AssertEncoding(text, resource, Encoding.GetEncoding("shift_jis"), offset);
                _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName);
            }
        }
Пример #3
0
        public void Detect_encoding_for_XML_file_with_LE_JP()
        {
            if(SysUtil.IsMono) {
                Assert.Ignore();
                return;
            }
            const string resource = "xml-little-endian-jp.txt";
            using(var stream = GetResourceStream(resource)) {
                var detector = new CharacterEncodingDetector();
                var encoding = detector.Detect(stream);
                var offset = stream.Position;
                Assert.IsNotNull(encoding, "encoding detection failed");
                Assert.AreEqual(0, offset, "wrong stream position");

                string text;
                using(var reader = new StreamReader(stream, encoding)) {
                    text = reader.ReadToEnd();
                }

                Assert.AreEqual("Unicode", encoding.EncodingName);
                AssertEncoding(text, resource, Encoding.Unicode, offset);
                _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName);
            }
        }
Пример #4
0
        public void Detect_encoding_for_XML_file_with_EUC_JP()
        {
            // BUGBUGBUG (steveb): for some reason, CharDet estimates EUC-JP to be the worst fit. :(

            const string resource = "xml-euc-jp.txt";
            using(var stream = GetResourceStream(resource)) {
                var detector = new CharacterEncodingDetector();
                var encoding = detector.Detect(stream);
                var offset = stream.Position;
                Assert.IsNotNull(encoding, "encoding detection failed");
                Assert.AreEqual(0, offset, "wrong stream position");

                string text;
                using(var reader = new StreamReader(stream, encoding)) {
                    text = reader.ReadToEnd();
                }

                Assert.AreEqual("EUC-JP", encoding.EncodingName);
                AssertEncoding(text, resource, Encoding.GetEncoding("EUC-JP"), offset);
                _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName);
            }
        }
Пример #5
0
        public void Detect_encoding_for_XML_file_with_GB2312()
        {
            const string resource = "xml-gb2312.txt";
            using(var stream = GetResourceStream(resource)) {
                var detector = new CharacterEncodingDetector();
                var encoding = detector.Detect(stream);
                var offset = stream.Position;
                Assert.IsNotNull(encoding, "encoding detection failed");

                string text;
                using(var reader = new StreamReader(stream, encoding)) {
                    text = reader.ReadToEnd();
                }

                Assert.AreEqual("Chinese Simplified (GB2312)", encoding.EncodingName);
                AssertEncoding(text, resource, Encoding.GetEncoding("GB2312"), offset);
                _log.DebugFormat("Detected encoding: {0}", encoding.EncodingName);
            }
        }