private static string GetFileText(ICharsetDetector cdet, byte[] buffer, string fullFileName) { try { var fi = new FileInfo(fullFileName); if (fi.Length < Config.Inst.MAX_FILE_SIZE_IN_BYTES) { using (var fs = File.OpenRead(fullFileName)) { var length = fs.Read(buffer, 0, Math.Min(buffer.Length, (int)fs.Length)); cdet.Reset(); cdet.Feed(buffer, 0, length); cdet.DataEnd(); fs.Position = 0; return(new StreamReader(fs, GetEncodingByCharsetName(cdet.Charset)).ReadToEnd()); } } } catch (Exception ex) { Debug.WriteLine(ex.GetType().Name + ": '" + ex.Message + '\''); } return(null); }
void Process(string charset, string dirname) { var path = Path.Combine(DATA_ROOT, dirname); if (!Directory.Exists(path)) { return; } var files = Directory.GetFiles(path); foreach (var file in files) { using (var fs = new FileStream(file, FileMode.Open)) { Console.WriteLine("Analysing {0}", file); detector.Feed(fs); detector.DataEnd(); Console.WriteLine("{0} : {1} {2}", file, detector.Charset, detector.Confidence); Assert.AreEqual(charset, detector.Charset); detector.Reset(); } } }
public void TestBomUTF16_BE() { byte[] buf = { 0xFE, 0xFF, 0x00, 0x68, 0x00, 0x65 }; detector = new CharsetDetector(); detector.Feed(buf, 0, buf.Length); detector.DataEnd(); Assert.AreEqual(Charsets.UTF16_BE, detector.Charset); Assert.AreEqual(1.0f, detector.Confidence); }
public void TestASCII() { string s = "The Documentation of the libraries is not complete " + "and your contributions would be greatly appreciated " + "the documentation you want to contribute to and " + "click on the [Edit] link to start writing"; using (MemoryStream ms = new MemoryStream(Encoding.ASCII.GetBytes(s))) { detector.Feed(ms); detector.DataEnd(); Assert.AreEqual(Charsets.ASCII, detector.Charset); Assert.AreEqual(1.0f, detector.Confidence); } }