/// <summary>Gets the character endcoding of a file</summary> /// <param name="File">The absolute path to a file</param> /// <returns>The character encoding, or unknown</returns> internal static Encoding GetEncodingFromFile(string File) { try { byte[] Data = System.IO.File.ReadAllBytes(File); if (Data.Length >= 3) { if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF) return Encoding.Utf8; if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76) return Encoding.Utf7; } if (Data.Length >= 2) { if (Data[0] == 0xFE & Data[1] == 0xFF) return Encoding.Utf16Be; if (Data[0] == 0xFF & Data[1] == 0xFE) return Encoding.Utf16Le; } if (Data.Length >= 4) { if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF) return Encoding.Utf32Be; if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00) return Encoding.Utf32Le; } UniversalDetector Det = new UniversalDetector(null); Det.HandleData(Data, 0, Data.Length); Det.DataEnd(); switch (Det.GetDetectedCharset()) { case "SHIFT_JIS": return Encoding.Shift_JIS; } Det.Reset(); return Encoding.Unknown; } catch { return Encoding.Unknown; } }
private void Check_Click(object sender, EventArgs e) { CharSetBox.Text = ""; PageBox.Text = ""; HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(UrlBox.Text); HttpWebResponse res; try { res = (HttpWebResponse)hwr.GetResponse(); } catch { CharSetBox.Text = "网页获取错误!"; return; } if (res.StatusCode == HttpStatusCode.OK) { Stream mystream = res.GetResponseStream(); MemoryStream msTemp = new MemoryStream(); int len = 0; byte[] buff = new byte[512]; StreamReader ReadPage = new StreamReader(mystream); while ((len = mystream.Read(buff, 0, 512)) > 0) { msTemp.Write(buff, 0, len); } res.Close(); if (msTemp.Length > 0) { msTemp.Seek(0, SeekOrigin.Begin); byte[] PageBytes = new byte[msTemp.Length]; msTemp.Read(PageBytes, 0, PageBytes.Length); msTemp.Seek(0, SeekOrigin.Begin); int DetLen = 0; byte[] DetectBuff = new byte[4096]; //CharsetListener listener = new CharsetListener(); UniversalDetector Det = new UniversalDetector(null); //while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone()) //{ // Det.HandleData(DetectBuff, 0, DetectBuff.Length); //} Det.HandleData(PageBytes, 0, PageBytes.Length); Det.DataEnd(); if (Det.GetDetectedCharset()!=null) { CharSetBox.Text = "OK! CharSet=" + Det.GetDetectedCharset(); PageBox.Text = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes); } Det.Reset(); } } }