Exemple #1
0
        private static string GetFileText(ICharsetDetector cdet, byte[] buffer, string fullFileName)
        {
            try
            {
                var fi = new FileInfo(fullFileName);
                if (fi.Length < Config.Inst.MAX_FILE_SIZE_IN_BYTES)
                {
                    using (var fs = File.OpenRead(fullFileName))
                    {
                        var length = fs.Read(buffer, 0, Math.Min(buffer.Length, (int)fs.Length));

                        cdet.Reset();
                        cdet.Feed(buffer, 0, length);
                        cdet.DataEnd();

                        fs.Position = 0;
                        return(new StreamReader(fs, GetEncodingByCharsetName(cdet.Charset)).ReadToEnd());
                    }
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.GetType().Name + ": '" + ex.Message + '\'');
            }
            return(null);
        }
Exemple #2
0
        void Process(string charset, string dirname)
        {
            var path = Path.Combine(DATA_ROOT, dirname);

            if (!Directory.Exists(path))
            {
                return;
            }

            var files = Directory.GetFiles(path);

            foreach (var file in files)
            {
                using (var fs = new FileStream(file, FileMode.Open))
                {
                    Console.WriteLine("Analysing {0}", file);
                    detector.Feed(fs);
                    detector.DataEnd();
                    Console.WriteLine("{0} : {1} {2}",
                                      file, detector.Charset, detector.Confidence);
                    Assert.AreEqual(charset, detector.Charset);
                    detector.Reset();
                }
            }
        }