コード例 #1
0
ファイル: TextEncoding.cs プロジェクト: leezer3/OpenBVE
		/// <summary>Gets the character endcoding of a file</summary>
		/// <param name="File">The absolute path to a file</param>
		/// <returns>The character encoding, or unknown</returns>
		internal static Encoding GetEncodingFromFile(string File)
		{
			try
			{
				byte[] Data = System.IO.File.ReadAllBytes(File);
				if (Data.Length >= 3)
				{
					if (Data[0] == 0xEF & Data[1] == 0xBB & Data[2] == 0xBF) return Encoding.Utf8;
					if (Data[0] == 0x2b & Data[1] == 0x2f & Data[2] == 0x76) return Encoding.Utf7;
				}
				if (Data.Length >= 2)
				{
					if (Data[0] == 0xFE & Data[1] == 0xFF) return Encoding.Utf16Be;
					if (Data[0] == 0xFF & Data[1] == 0xFE) return Encoding.Utf16Le;
				}
				if (Data.Length >= 4)
				{
					if (Data[0] == 0x00 & Data[1] == 0x00 & Data[2] == 0xFE & Data[3] == 0xFF) return Encoding.Utf32Be;
					if (Data[0] == 0xFF & Data[1] == 0xFE & Data[2] == 0x00 & Data[3] == 0x00) return Encoding.Utf32Le;
				}

				UniversalDetector Det = new UniversalDetector(null);
				Det.HandleData(Data, 0, Data.Length);
				Det.DataEnd();
				switch (Det.GetDetectedCharset())
				{
					case "SHIFT_JIS":
						return Encoding.Shift_JIS;
				}
				Det.Reset();
				return Encoding.Unknown;
			}
			catch
			{
				return Encoding.Unknown;
			}
		}
コード例 #2
0
ファイル: Form1.cs プロジェクト: MACDfree/nuniversalchardet
        private void Check_Click(object sender, EventArgs e)
        {
            CharSetBox.Text = "";
            PageBox.Text = "";

            HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(UrlBox.Text);
            HttpWebResponse res;
            try
            {
                res = (HttpWebResponse)hwr.GetResponse();
            }
            catch
            {
                CharSetBox.Text = "网页获取错误!";
                return;
            }

            if (res.StatusCode == HttpStatusCode.OK)
            {
                Stream mystream = res.GetResponseStream();
                MemoryStream msTemp = new MemoryStream();
                int len = 0;
                byte[] buff = new byte[512];
                StreamReader ReadPage = new StreamReader(mystream);
      

                while ((len = mystream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);

                }
                res.Close();

                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);

                    msTemp.Seek(0, SeekOrigin.Begin);
                    int DetLen = 0;
                    byte[] DetectBuff = new byte[4096];

                    //CharsetListener listener = new CharsetListener();

                    UniversalDetector Det = new UniversalDetector(null);
                    //while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    //{
                    //    Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    //}
                    Det.HandleData(PageBytes, 0, PageBytes.Length);
                    Det.DataEnd();
                    if (Det.GetDetectedCharset()!=null)
                    {
                        CharSetBox.Text = "OK! CharSet=" + Det.GetDetectedCharset();
                        PageBox.Text = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                    }
                    Det.Reset();
                }


            }
        }