예제 #1
0
        /// <summary>Gets the character endcoding of a file</summary>
        /// <param name="File">The absolute path to a file</param>
        /// <returns>The character encoding, or unknown</returns>
        internal static Encoding GetEncodingFromFile(string File)
        {
            try
            {
                byte[] Data = System.IO.File.ReadAllBytes(File);
                if (Data.Length >= 3)
                {
                    if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF)
                    {
                        return(Encoding.Utf8);
                    }
                    if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76)
                    {
                        return(Encoding.Utf7);
                    }
                }
                if (Data.Length >= 2)
                {
                    if (Data[0] == 0xFE & Data[1] == 0xFF)
                    {
                        return(Encoding.Utf16Be);
                    }
                    if (Data[0] == 0xFF & Data[1] == 0xFE)
                    {
                        return(Encoding.Utf16Le);
                    }
                }
                if (Data.Length >= 4)
                {
                    if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF)
                    {
                        return(Encoding.Utf32Be);
                    }
                    if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00)
                    {
                        return(Encoding.Utf32Le);
                    }
                }

                UniversalDetector Det = new UniversalDetector(null);
                Det.HandleData(Data, 0, Data.Length);
                Det.DataEnd();
                switch (Det.GetDetectedCharset())
                {
                case "SHIFT_JIS":
                    return(Encoding.Shift_JIS);

                case "UTF-8":
                    return(Encoding.Utf8);

                case "UTF-7":
                    return(Encoding.Utf7);

                case "WINDOWS-1252":
                    return(Encoding.Windows1252);

                case "BIG5":
                    return(Encoding.Big5);
                }
                Det.Reset();
                return(Encoding.Unknown);
            }
            catch
            {
                return(Encoding.Unknown);
            }
        }
예제 #2
0
        /// <summary>Gets the character endcoding of a file</summary>
        /// <param name="File">The absolute path to a file</param>
        /// <returns>The character encoding, or unknown</returns>
        internal static Encoding GetEncodingFromFile(string File)
        {
            if (File == null || !System.IO.File.Exists(File))
            {
                return(Encoding.Unknown);
            }
            try
            {
                System.IO.FileInfo fInfo = new FileInfo(File);
                byte[]             Data  = System.IO.File.ReadAllBytes(File);
                if (Data.Length >= 3)
                {
                    if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF)
                    {
                        return(Encoding.Utf8);
                    }

                    if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76)
                    {
                        return(Encoding.Utf7);
                    }
                }
                if (Data.Length >= 2)
                {
                    if (Data[0] == 0xFE & Data[1] == 0xFF)
                    {
                        return(Encoding.Utf16Be);
                    }

                    if (Data[0] == 0xFF & Data[1] == 0xFE)
                    {
                        return(Encoding.Utf16Le);
                    }
                }
                if (Data.Length >= 4)
                {
                    if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF)
                    {
                        return(Encoding.Utf32Be);
                    }

                    if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00)
                    {
                        return(Encoding.Utf32Le);
                    }
                }

                UniversalDetector Det = new UniversalDetector(null);
                Det.HandleData(Data, 0, Data.Length);
                Det.DataEnd();
                switch (Det.GetDetectedCharset())
                {
                case "SHIFT_JIS":
                    return(Encoding.Shift_JIS);

                case "UTF-8":
                    return(Encoding.Utf8);

                case "UTF-7":
                    return(Encoding.Utf7);

                case "WINDOWS-1252":
                    return(Encoding.Windows1252);

                case "BIG5":
                    if (Path.GetFileName(File).ToLowerInvariant() == "stoklosy.b3d" && fInfo.Length == 18256)
                    {
                        //Polish Warsaw metro object file uses diacritics in filenames
                        return(Encoding.Windows1252);
                    }
                    return(Encoding.Big5);

                case "EUC-KR":
                    return(Encoding.EUC_KR);
                }
                Det.Reset();
                return(Encoding.Unknown);
            }
            catch
            {
                return(Encoding.Unknown);
            }
        }
예제 #3
0
        private void Check_Click(object sender, EventArgs e)
        {
            CharSetBox.Text = "";
            PageBox.Text    = "";

            HttpWebRequest  hwr = (HttpWebRequest)HttpWebRequest.Create(UrlBox.Text);
            HttpWebResponse res;

            try
            {
                res = (HttpWebResponse)hwr.GetResponse();
            }
            catch
            {
                CharSetBox.Text = "网页获取错误!";
                return;
            }

            if (res.StatusCode == HttpStatusCode.OK)
            {
                Stream       mystream = res.GetResponseStream();
                MemoryStream msTemp   = new MemoryStream();
                int          len      = 0;
                byte[]       buff     = new byte[512];
                StreamReader ReadPage = new StreamReader(mystream);


                while ((len = mystream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }
                res.Close();

                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);

                    msTemp.Seek(0, SeekOrigin.Begin);
                    int    DetLen     = 0;
                    byte[] DetectBuff = new byte[4096];

                    //CharsetListener listener = new CharsetListener();

                    UniversalDetector Det = new UniversalDetector(null);
                    //while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    //{
                    //    Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    //}
                    Det.HandleData(PageBytes, 0, PageBytes.Length);
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        CharSetBox.Text = "OK! CharSet=" + Det.GetDetectedCharset();
                        PageBox.Text    = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                    }
                    Det.Reset();
                }
            }
        }