public System.Text.Encoding GetEncodingOfFile(string filename) { int count = 0; byte[] buf; using (System.IO.FileStream fs = new System.IO.FileStream(filename, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read)) { buf = new byte[fs.Length]; count = fs.Read(buf, 0, buf.Length); } if (count < 1) { return(System.Text.Encoding.Default); } NChardet.Detector detect = new NChardet.Detector(); CharsetDetectionObserver cdo = new CharsetDetectionObserver(); detect.Init(cdo); if (detect.isAscii(buf, count)) { return(System.Text.Encoding.ASCII); } else { detect.DoIt(buf, count, true); detect.DataEnd(); if (string.IsNullOrEmpty(cdo.Charset)) { return(System.Text.Encoding.Default); } else { return(System.Text.Encoding.GetEncoding(cdo.Charset)); } } }
public static string DetectStream(Stream stream) { int lang = 2; NChardet.Detector det = new NChardet.Detector(lang); CharsetDetectionObserver cdo = new CharsetDetectionObserver(); det.Init(cdo); byte[] buf = new byte[1024]; bool done = false; bool isAscii = true; int len; using (stream) { while ((len = stream.Read(buf, 0, buf.Length)) != 0) { // 探测是否为Ascii编码 if (isAscii == true) { isAscii = det.isAscii(buf, len); } // 如果不是Ascii编码,并且编码未确定,则继续探测 if (isAscii == false && done == false) { done = det.DoIt(buf, len, false); } } } //调用DatEnd方法, //如果引擎认为已经探测出了正确的编码, //则会在此时调用ICharsetDetectionObserver的Notify方法 det.DataEnd(); string charset = Encoding.Default.BodyName; if (isAscii == true) { charset = Encoding.ASCII.BodyName; } else if (!string.IsNullOrEmpty(cdo.Charset)) { charset = cdo.Charset; } else { string[] probable = det.getProbableCharsets(); if (probable != null && probable.Length >= 1) { string probableCharset = probable[0].ToLower(); if (probableCharset == "gb2312" || probableCharset == "utf-8") { charset = probableCharset; } } } return(charset); }