Exemple #1
0
        private static string GetFileText(ICharsetDetector cdet, byte[] buffer, string fullFileName)
        {
            try
            {
                var fi = new FileInfo(fullFileName);
                if (fi.Length < Config.Inst.MAX_FILE_SIZE_IN_BYTES)
                {
                    using (var fs = File.OpenRead(fullFileName))
                    {
                        var length = fs.Read(buffer, 0, Math.Min(buffer.Length, (int)fs.Length));

                        cdet.Reset();
                        cdet.Feed(buffer, 0, length);
                        cdet.DataEnd();

                        fs.Position = 0;
                        return(new StreamReader(fs, GetEncodingByCharsetName(cdet.Charset)).ReadToEnd());
                    }
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.GetType().Name + ": '" + ex.Message + '\'');
            }
            return(null);
        }
Exemple #2
0
        void Process(string charset, string dirname)
        {
            var path = Path.Combine(DATA_ROOT, dirname);

            if (!Directory.Exists(path))
            {
                return;
            }

            var files = Directory.GetFiles(path);

            foreach (var file in files)
            {
                using (var fs = new FileStream(file, FileMode.Open))
                {
                    Console.WriteLine("Analysing {0}", file);
                    detector.Feed(fs);
                    detector.DataEnd();
                    Console.WriteLine("{0} : {1} {2}",
                                      file, detector.Charset, detector.Confidence);
                    Assert.AreEqual(charset, detector.Charset);
                    detector.Reset();
                }
            }
        }
Exemple #3
0
 public void TestBomUTF16_BE()
 {
     byte[] buf = { 0xFE, 0xFF, 0x00, 0x68, 0x00, 0x65 };
     detector = new CharsetDetector();
     detector.Feed(buf, 0, buf.Length);
     detector.DataEnd();
     Assert.AreEqual(Charsets.UTF16_BE, detector.Charset);
     Assert.AreEqual(1.0f, detector.Confidence);
 }
Exemple #4
0
 public void TestBomUTF16_BE()
 {
     byte[] buf = { 0xFE, 0xFF, 0x00, 0x68, 0x00, 0x65 };
     detector = new CharsetDetector();
     detector.Feed(buf, 0, buf.Length);
     detector.DataEnd();
     Assert.AreEqual(Charsets.UTF16_BE, detector.Charset);
     Assert.AreEqual(1.0f, detector.Confidence);
 }
Exemple #5
0
        public void TestASCII()
        {
            string s =
                "The Documentation of the libraries is not complete " +
                "and your contributions would be greatly appreciated " +
                "the documentation you want to contribute to and " +
                "click on the [Edit] link to start writing";

            using (MemoryStream ms = new MemoryStream(Encoding.ASCII.GetBytes(s))) {
                detector.Feed(ms);
                detector.DataEnd();
                Assert.AreEqual(Charsets.ASCII, detector.Charset);
                Assert.AreEqual(1.0f, detector.Confidence);
            }
        }