private static string GetFileText(ICharsetDetector cdet, byte[] buffer, string fullFileName) { try { var fi = new FileInfo(fullFileName); if (fi.Length < Config.Inst.MAX_FILE_SIZE_IN_BYTES) { using (var fs = File.OpenRead(fullFileName)) { var length = fs.Read(buffer, 0, Math.Min(buffer.Length, (int)fs.Length)); cdet.Reset(); cdet.Feed(buffer, 0, length); cdet.DataEnd(); fs.Position = 0; return(new StreamReader(fs, GetEncodingByCharsetName(cdet.Charset)).ReadToEnd()); } } } catch (Exception ex) { Debug.WriteLine(ex.GetType().Name + ": '" + ex.Message + '\''); } return(null); }
void Process(string charset, string dirname) { var path = Path.Combine(DATA_ROOT, dirname); if (!Directory.Exists(path)) { return; } var files = Directory.GetFiles(path); foreach (var file in files) { using (var fs = new FileStream(file, FileMode.Open)) { Console.WriteLine("Analysing {0}", file); detector.Feed(fs); detector.DataEnd(); Console.WriteLine("{0} : {1} {2}", file, detector.Charset, detector.Confidence); Assert.AreEqual(charset, detector.Charset); detector.Reset(); } } }