/// <summary>Gets the character endcoding of a file</summary> /// <param name="File">The absolute path to a file</param> /// <returns>The character encoding, or unknown</returns> internal static Encoding GetEncodingFromFile(string File) { try { byte[] Data = System.IO.File.ReadAllBytes(File); if (Data.Length >= 3) { if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF) { return(Encoding.Utf8); } if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76) { return(Encoding.Utf7); } } if (Data.Length >= 2) { if (Data[0] == 0xFE & Data[1] == 0xFF) { return(Encoding.Utf16Be); } if (Data[0] == 0xFF & Data[1] == 0xFE) { return(Encoding.Utf16Le); } } if (Data.Length >= 4) { if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF) { return(Encoding.Utf32Be); } if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00) { return(Encoding.Utf32Le); } } UniversalDetector Det = new UniversalDetector(null); Det.HandleData(Data, 0, Data.Length); Det.DataEnd(); switch (Det.GetDetectedCharset()) { case "SHIFT_JIS": return(Encoding.Shift_JIS); case "UTF-8": return(Encoding.Utf8); case "UTF-7": return(Encoding.Utf7); case "WINDOWS-1252": return(Encoding.Windows1252); case "BIG5": return(Encoding.Big5); } Det.Reset(); return(Encoding.Unknown); } catch { return(Encoding.Unknown); } }
/// <summary>Gets the character endcoding of a file</summary> /// <param name="File">The absolute path to a file</param> /// <returns>The character encoding, or unknown</returns> internal static Encoding GetEncodingFromFile(string File) { if (File == null || !System.IO.File.Exists(File)) { return(Encoding.Unknown); } try { System.IO.FileInfo fInfo = new FileInfo(File); byte[] Data = System.IO.File.ReadAllBytes(File); if (Data.Length >= 3) { if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF) { return(Encoding.Utf8); } if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76) { return(Encoding.Utf7); } } if (Data.Length >= 2) { if (Data[0] == 0xFE & Data[1] == 0xFF) { return(Encoding.Utf16Be); } if (Data[0] == 0xFF & Data[1] == 0xFE) { return(Encoding.Utf16Le); } } if (Data.Length >= 4) { if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF) { return(Encoding.Utf32Be); } if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00) { return(Encoding.Utf32Le); } } UniversalDetector Det = new UniversalDetector(null); Det.HandleData(Data, 0, Data.Length); Det.DataEnd(); switch (Det.GetDetectedCharset()) { case "SHIFT_JIS": return(Encoding.Shift_JIS); case "UTF-8": return(Encoding.Utf8); case "UTF-7": return(Encoding.Utf7); case "WINDOWS-1252": return(Encoding.Windows1252); case "BIG5": if (Path.GetFileName(File).ToLowerInvariant() == "stoklosy.b3d" && fInfo.Length == 18256) { //Polish Warsaw metro object file uses diacritics in filenames return(Encoding.Windows1252); } return(Encoding.Big5); case "EUC-KR": return(Encoding.EUC_KR); } Det.Reset(); return(Encoding.Unknown); } catch { return(Encoding.Unknown); } }
private void Check_Click(object sender, EventArgs e) { CharSetBox.Text = ""; PageBox.Text = ""; HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(UrlBox.Text); HttpWebResponse res; try { res = (HttpWebResponse)hwr.GetResponse(); } catch { CharSetBox.Text = "网页获取错误!"; return; } if (res.StatusCode == HttpStatusCode.OK) { Stream mystream = res.GetResponseStream(); MemoryStream msTemp = new MemoryStream(); int len = 0; byte[] buff = new byte[512]; StreamReader ReadPage = new StreamReader(mystream); while ((len = mystream.Read(buff, 0, 512)) > 0) { msTemp.Write(buff, 0, len); } res.Close(); if (msTemp.Length > 0) { msTemp.Seek(0, SeekOrigin.Begin); byte[] PageBytes = new byte[msTemp.Length]; msTemp.Read(PageBytes, 0, PageBytes.Length); msTemp.Seek(0, SeekOrigin.Begin); int DetLen = 0; byte[] DetectBuff = new byte[4096]; //CharsetListener listener = new CharsetListener(); UniversalDetector Det = new UniversalDetector(null); //while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone()) //{ // Det.HandleData(DetectBuff, 0, DetectBuff.Length); //} Det.HandleData(PageBytes, 0, PageBytes.Length); Det.DataEnd(); if (Det.GetDetectedCharset() != null) { CharSetBox.Text = "OK! CharSet=" + Det.GetDetectedCharset(); PageBox.Text = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes); } Det.Reset(); } } }