예제 #1
0
        public static Encoding Detect(Stream seekable_stream)
        {
            if (!seekable_stream.CanSeek)
            {
                throw new Exception("Detect encoding error: stream can't seek.");
            }

            long ori_pos = seekable_stream.Position;

            int buffer_size = 4096, cur;

            byte[]            buffer   = new byte[buffer_size];
            UniversalDetector detector = new UniversalDetector(null);

            while ((cur = seekable_stream.Read(buffer, 0, buffer_size)) > 0 && !detector.IsDone())
            {
                detector.HandleData(buffer, 0, cur);
            }
            detector.DataEnd();

            seekable_stream.Seek(ori_pos, SeekOrigin.Begin);

            if (detector.IsDone())
            {
                return(Encoding.GetEncoding(detector.GetDetectedCharset()));
            }
            return(null);
        }
예제 #2
0
        /// <summary>
        /// 获取字节流编码
        /// </summary>
        /// <param name="stream">字节流</param>
        /// <returns></returns>
        private static Encoding GetEncoding(Stream stream)
        {
            if (stream != null && stream.Length > 0)
            {
                //每次分配1024字节,进行编码判断
                var buffer = new byte[1024];

                var seek = stream.Position;
                stream.Seek(0, SeekOrigin.Begin);

                var ud = new UniversalDetector(null);
                while (!ud.IsDone() && stream.Read(buffer, 0, buffer.Length) > 0)
                {
                    ud.HandleData(buffer, 0, buffer.Length);
                }
                ud.DataEnd();

                stream.Seek(seek, SeekOrigin.Begin);

                var encoding = ud.GetDetectedCharset();
                if (encoding != null)
                {
                    if (encoding == Constants.CHARSET_X_ISO_10646_UCS_4_2143 || encoding == Constants.CHARSET_X_ISO_10646_UCS_4_3412)
                    {
                        encoding = "UTF-32";
                    }

                    return(Encoding.GetEncoding(encoding));
                }
            }

            return(Encoding.Default);
        }
예제 #3
0
        public static string DetectAndReadToEnd(Stream stream, Encoding default_encoding)
        {
            var ms = new MemoryStream();

            int buffer_size = 4096, cur;

            byte[]            buffer      = new byte[buffer_size];
            bool              detect_done = false;
            UniversalDetector detector    = new UniversalDetector(null);

            while ((cur = stream.Read(buffer, 0, buffer_size)) > 0)
            {
                ms.Write(buffer, 0, cur);
                if (!detect_done)
                {
                    detector.HandleData(buffer, 0, cur);
                    detect_done = detector.IsDone();
                }
            }
            detector.DataEnd();

            Encoding encoding;

            if (detect_done)
            {
                encoding = Encoding.GetEncoding(detector.GetDetectedCharset());
            }
            else if (default_encoding != null)
            {
                encoding = default_encoding;
            }
            else
            {
                encoding = Default;
            }

            ms.Seek(0, SeekOrigin.Begin);

            using (var sr = new StreamReader(ms, encoding))
                return(sr.ReadToEnd());
        }
예제 #4
0
        /// <summary>
        /// UniversalCharDet算法识别编码
        /// </summary>
        /// <param name="bytes"></param>
        /// <returns></returns>
        private Encoding GetEncodingByUniversalCharDet(byte[] bytes)
        {
            var detector     = new UniversalDetector(null);
            var detectBuffer = new byte[4096];

            while (this.MemoryStream.Read(detectBuffer, 0, detectBuffer.Length) > 0 && !detector.IsDone())
            {
                detector.HandleData(detectBuffer, 0, detectBuffer.Length);
            }

            detector.DataEnd();

            if (!string.IsNullOrEmpty(detector.GetDetectedCharset()))
            {
                return(Encoding.GetEncoding(detector.GetDetectedCharset()));
            }

            return(null);
        }
예제 #5
0
        static void ProcessFile(String filePath)
        {
            var fileStream = new FileStream(filePath, FileMode.Open);

            if (fileStream.Length > 0)
            {
                fileStream.Seek(0, SeekOrigin.Begin);
                var pageBytes = new Byte[fileStream.Length];
                fileStream.Read(pageBytes, 0, pageBytes.Length);

                fileStream.Seek(0, SeekOrigin.Begin);
                var detectionLength   = 0;
                var detectionBuffer   = new Byte[4096];
                var universalDetector = new UniversalDetector(null);

                while ((detectionLength = fileStream.Read(detectionBuffer, 0, detectionBuffer.Length)) > 0 && !universalDetector.IsDone())
                {
                    universalDetector.HandleData(detectionBuffer, 0, detectionBuffer.Length);
                }

                universalDetector.DataEnd();

                if (universalDetector.GetDetectedCharset() != null)
                {
                    Console.WriteLine("Charset: " + universalDetector.GetDetectedCharset() + ". Encoding: " + System.Text.Encoding.GetEncoding(universalDetector.GetDetectedCharset()).EncodingName);
                    Console.WriteLine();
                }
                else
                {
                    Console.WriteLine("Charset: " + "ASCII" + ". Encoding: " + System.Text.Encoding.GetEncoding("ASCII"));
                    Console.WriteLine();
                }
            }

            fileStream.Dispose();
        }
예제 #6
0
        /// <summary>
        /// 返回流的编码格式
        /// </summary>
        /// <param name="stream"></param>
        /// <returns></returns>
        private static Encoding getEncoding(string streamName)
        {
            Encoding encoding = Encoding.Default;

            using (Stream stream = new FileStream(streamName, FileMode.Open))
            {
                MemoryStream msTemp = new MemoryStream();
                int          len    = 0;
                byte[]       buff   = new byte[512];
                while ((len = stream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }
                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);
                    msTemp.Seek(0, SeekOrigin.Begin);
                    int DetLen                   = 0;
                    UniversalDetector Det        = new UniversalDetector(null);
                    byte[]            DetectBuff = new byte[4096];
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        encoding = Encoding.GetEncoding(Det.GetDetectedCharset());
                    }
                }
                msTemp.Close();
                msTemp.Dispose();
                return(encoding);
            }
        }
예제 #7
0
        /// <summary>
        /// HttpWebRequest读取网页 字符集将自动匹配如果找不倒指定字符集,则使用utf-8
        /// </summary>
        /// <param name="url">url</param>
        /// <param name="parament">一个用于区分的参数 </param>
        private static string GetWeb(string url, string encoding)
        {
            string strHtmlContent = "";

            //字符集编码

            if (url.IndexOf("http") == -1)//如果米有HTTP
            {
                throw new Exception("请提供完整的HTTP地址");
            }
            System.Net.HttpWebRequest myrequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
            myrequest.Timeout = 600000;//超时时间 10 分钟
            //设置没有缓存
            myrequest.Headers.Set("Pragma", "no-cache");
            System.IO.Stream           mystream   = new System.IO.MemoryStream();
            System.Net.HttpWebResponse myresponse = (System.Net.HttpWebResponse)myrequest.GetResponse();
            mystream = myresponse.GetResponseStream();
            //用于读取数据的内存流
            System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();

            #region 自动判断编码字符集

            //查看流长时是不是有效数据
            int    len  = 0;
            byte[] buff = new byte[512];
            while ((len = mystream.Read(buff, 0, buff.Length)) > 0)
            {
                memoryStream.Write(buff, 0, len);
            }

            if (memoryStream.Length > 0)
            {
                //设置流指向头
                memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
                int DetLen = 0;
                //编码字符体的buffer 默认需要4KB的数据
                byte[] DetectBuff = new byte[4096];
                //开始取得编码
                UniversalDetector Det = new UniversalDetector(null);
                //从当前流中读取块并写入到buff中
                while ((DetLen = memoryStream.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                {
                    Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                }
                Det.DataEnd();
                //得到字符集合
                if (Det.GetDetectedCharset() != null)
                {
                    if (encoding == "")
                    {
                        //得到字符集
                        encoding = Det.GetDetectedCharset();
                    }
                }
                //设置流指向头
                memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
            }

            #endregion
            System.Text.Encoding   myencoding     = System.Text.Encoding.GetEncoding(encoding);
            System.IO.StreamReader mystreamreader = new System.IO.StreamReader(memoryStream, myencoding);
            strHtmlContent = mystreamreader.ReadToEnd();
            mystream.Close();
            mystreamreader.Dispose();
            mystream.Close();
            mystream.Dispose();
            return(strHtmlContent);
        }
예제 #8
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="url"></param>
        /// <param name="cookies"></param>
        /// <param name="refrere"></param>
        /// <param name="encoding">1gbk,2utf8,3auto</param>
        /// <param name="timeout"></param>
        /// <param name="isRedirect"></param>
        /// <returns></returns>
        public string httpGET(string url, ref CookieCollection cookies, string refrere, int encoding, int timeout, bool isRedirect)
        {
            url = getDealUrl(url);
            Stream          stream          = null;
            HttpWebResponse httpWebResponse = null;
            HttpWebRequest  httpWebRequest  = null;
            string          result;

            try {
                ServicePointManager.Expect100Continue      = false;
                ServicePointManager.DefaultConnectionLimit = 1000;
                ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult);
                httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
                httpWebRequest.Headers.Clear();
                httpWebRequest.AutomaticDecompression = DecompressionMethods.GZip;
                httpWebRequest.CookieContainer        = xkCookies.CookieContainer(cookies, url);
                httpWebRequest.KeepAlive         = true;
                httpWebRequest.ProtocolVersion   = HttpVersion.Version10;
                httpWebRequest.Method            = "GET";
                httpWebRequest.Referer           = refrere;
                httpWebRequest.Timeout           = timeout * 1000;
                httpWebRequest.AllowAutoRedirect = false;
                httpWebRequest.Accept            = "image/jpeg, application/x-ms-application, image/gif, application/xaml+xml, image/pjpeg, application/x-ms-xbap, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
                httpWebRequest.Headers.Add("Accept-Language", "zh-cn");
                httpWebRequest.UserAgent = useragent;
                string text = httpWebRequest.Headers.ToString();
                httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                stream          = httpWebResponse.GetResponseStream();
                xkCookies.UpCookie(ref cookies, url, httpWebResponse.Headers["Set-Cookie"], httpWebResponse.Cookies);
                string tmp_result = "";
                if (httpWebResponse.ContentEncoding.ToLower().Contains("gzip"))
                {
                    stream = new GZipStream(stream, CompressionMode.Decompress);
                }
                else
                {
                    if (httpWebResponse.ContentEncoding.ToLower().Contains("deflate"))
                    {
                        stream = new DeflateStream(stream, CompressionMode.Decompress);
                    }
                }

                Stream       mystream = httpWebResponse.GetResponseStream();
                MemoryStream msTemp   = new MemoryStream();
                int          len      = 0;
                byte[]       buff     = new byte[512];

                while ((len = mystream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }
                httpWebResponse.Close();

                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);

                    msTemp.Seek(0, SeekOrigin.Begin);
                    int               DetLen     = 0;
                    byte[]            DetectBuff = new byte[4096];
                    UniversalDetector Det        = new UniversalDetector(null);
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        tmp_result = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                    }
                    else
                    {
                        tmp_result = System.Text.Encoding.GetEncoding("GBK").GetString(PageBytes);
                    }
                }

                tmp_result = string.Concat(new object[]
                {
                    tmp_result,
                    "\r\n\r\n=================================================\r\n\r\n本次请求:",
                    url,
                    " 响应结果:",
                    httpWebResponse.StatusCode,
                    "\r\n\r\nCookie数量",
                    httpWebRequest.CookieContainer.Count,
                    "\r\n",
                    httpWebRequest.CookieContainer.GetCookieHeader(new Uri(url)),
                    "\r\nrequest:\r\n",
                    text,
                    "\r\nresponse:\r\n",
                    httpWebResponse.Headers.ToString(),
                    "\r\n\r\n=================================================\r\n\r\n"
                });
                if (isRedirect)
                {
                    if (httpWebResponse.Headers["Location"] != null && httpWebResponse.Headers["Location"].Length > 2)
                    {
                        string url_redirect = "";
                        if (httpWebResponse.Headers["Location"].ToLower().Contains("http://"))
                        {
                            url_redirect = httpWebResponse.Headers["Location"];
                        }
                        else
                        {
                            url_redirect = geturl(httpWebResponse.Headers["Location"], url);
                        }
                        tmp_result = httpGET(url_redirect, ref cookies, url, 3, 10, isRedirect) + tmp_result;
                    }
                    else
                    {
                        if (httpWebResponse.Headers["Refresh"] != null && httpWebResponse.Headers["Refresh"].Length > 2)
                        {
                            string text3 = httpWebResponse.Headers["Refresh"].ToLower().Replace("url=", "`").Split('`')[1];
                            if (!text3.Contains("http://"))
                            {
                                text3 = geturl(text3, url);
                            }
                            tmp_result = httpGET(text3, ref cookies, url, 3, 10, isRedirect) + tmp_result;
                        }
                    }
                    if (tmp_result.Contains("Refresh"))
                    {
                        Winista.Text.HtmlParser.Util.NodeList htmlNodes = new Parser(new Lexer(tmp_result)).Parse(new TagNameFilter("meta"));
                        if (htmlNodes.Count > 1)
                        {
                            for (int i = 0; i < htmlNodes.Count; i++)
                            {
                                MetaTag option = (MetaTag)htmlNodes.ElementAt(i);
                                if (option.GetAttribute("http-equiv") == "Refresh")
                                {
                                    string content = option.GetAttribute("content");
                                    string text3   = content.ToLower().Replace("url=", "`").Split('`')[1];

                                    if (!text3.Contains("http://"))
                                    {
                                        text3 = geturl(text3, url);
                                    }
                                    tmp_result = httpGET(text3, ref cookies, url, 3, 10, isRedirect) + tmp_result;
                                }
                            }
                        }
                    }
                }
                httpWebResponse.Close();
                httpWebRequest.Abort();
                result = tmp_result;

                if (!url.Contains(":8888") && !url.Contains("renzhe") && !url.Contains("zq535228") && !url.Contains("whoissoft") && !url.Contains("chinaz"))
                {
                    EchoHelper.Echo(string.Format("成功获取:{0}的HTML内容。", url), null, EchoHelper.EchoType.普通信息);
                }
            } catch (Exception ex) {
                result = ex.Message;
            } finally {
                if (stream != null)
                {
                    stream.Close();
                }
                if (httpWebResponse != null)
                {
                    httpWebResponse.Close();
                }
                if (httpWebRequest != null)
                {
                    httpWebRequest.Abort();
                }
            }
            return(result);
        }
예제 #9
0
        /// <summary>
        /// HttpWebRequest读取网页 字符集将自动匹配如果找不倒指定字符集,则使用utf-8
        /// </summary>
        /// <param name="url">url</param>
        /// <param name="parament">一个用于区分的参数 </param>
        private static string GetWeb(string url, string encoding)
        {
            string strHtmlContent = "";

            System.IO.Stream          mystream  = new System.IO.MemoryStream();
            System.Net.HttpWebRequest myrequest = null;
            try
            {
                myrequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);

                //字符集编码

                if (url.IndexOf("http") == -1)//如果米有HTTP
                {
                    throw new Exception("请提供完整的HTTP地址");
                }

                myrequest.Timeout = 20 * 1000;//超时时间 20秒
                //设置没有缓存
                myrequest.Headers.Set("Pragma", "no-cache");
                System.Net.HttpWebResponse myresponse = null;
                if (myrequest.KeepAlive)
                {
                    try
                    {
                        myresponse = (System.Net.HttpWebResponse)myrequest.GetResponse();
                        mystream   = myresponse.GetResponseStream();
                    }
                    catch (Exception ex)
                    {
                        System.Diagnostics.Debug.WriteLine(DateTime.Now + "获取网页内容出错:url:" + url + "\r\n" + ex.Message + " " + (ex.StackTrace == null ? " " : " " + ex.StackTrace));

                        return(strHtmlContent);
                    }
                }
                //用于读取数据的内存流
                System.IO.MemoryStream memoryStream = new System.IO.MemoryStream();

                #region 自动判断编码字符集

                //查看流长时是不是有效数据
                int    len  = 0;
                byte[] buff = new byte[512];
                while ((len = mystream.Read(buff, 0, buff.Length)) > 0)
                {
                    memoryStream.Write(buff, 0, len);
                }

                if (memoryStream.Length > 0)
                {
                    //设置流指向头
                    memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
                    int DetLen = 0;
                    //编码字符体的buffer 默认需要4KB的数据
                    byte[] DetectBuff = new byte[4096];
                    //开始取得编码
                    UniversalDetector Det = new UniversalDetector(null);
                    //从当前流中读取块并写入到buff中
                    while ((DetLen = memoryStream.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    //得到字符集合
                    if (Det.GetDetectedCharset() != null)
                    {
                        if (encoding == "")
                        {
                            //得到字符集
                            encoding = Det.GetDetectedCharset();
                        }
                    }
                    //设置流指向头
                    memoryStream.Seek(0, System.IO.SeekOrigin.Begin);
                }

                #endregion
                System.Text.Encoding   myencoding     = System.Text.Encoding.GetEncoding(encoding);
                System.IO.StreamReader mystreamreader = new System.IO.StreamReader(memoryStream, myencoding);
                strHtmlContent = mystreamreader.ReadToEnd();
                mystreamreader.Dispose();
                if (myresponse != null)
                {
                    myresponse.Close();
                }
            }
            catch (Exception ex)
            {
                System.Diagnostics.Debug.WriteLine(DateTime.Now + "获取网页内容出错:url:" + url + "\r\n" + ex.Message + " " + (ex.StackTrace == null ? " " : " " + ex.StackTrace));
            }
            finally
            {
                mystream.Close();
                mystream.Dispose();
                // HttpWebRequest 不会自己销毁对象
                //销毁关闭连接
                if (myrequest != null)
                {
                    myrequest.Abort();
                }
            }
            return(strHtmlContent);
        }
예제 #10
0
        private void DetectedCharset(Stream stream, out string htmlText, out Encoding enc)
        {
            htmlText = "";
            enc      = Encoding.Default;
            try {
                Stream mystream = stream;
                if (stream == null)
                {
                    return;
                }
                MemoryStream msTemp = new MemoryStream();
                int          len    = 0;
                byte[]       buff   = new byte[512];

                while ((len = mystream.Read(buff, 0, 512)) > 0)
                {
                    msTemp.Write(buff, 0, len);
                }

                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    byte[] PageBytes = new byte[msTemp.Length];
                    msTemp.Read(PageBytes, 0, PageBytes.Length);

                    msTemp.Seek(0, SeekOrigin.Begin);
                    int               DetLen     = 0;
                    byte[]            DetectBuff = new byte[4096];
                    CharsetListener   listener   = new CharsetListener();
                    UniversalDetector Det        = new UniversalDetector(null);
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                    {
                        Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    Det.DataEnd();
                    if (Det.GetDetectedCharset() != null)
                    {
                        /*网页内容编码*/
                        enc = Encoding.GetEncoding(Det.GetDetectedCharset());
                        /*解码后的内容*/
                        htmlText = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                    }
                }
            } catch { }
        }
예제 #11
0
        public static string Read_File(FileInfo file)
        {
            string       tmp_result = "";
            Stream       mystream   = file.OpenRead();
            MemoryStream msTemp     = new MemoryStream();
            int          len        = 0;

            byte[] buff = new byte[512];

            while ((len = mystream.Read(buff, 0, 512)) > 0)
            {
                msTemp.Write(buff, 0, len);
            }

            if (msTemp.Length > 0)
            {
                msTemp.Seek(0, SeekOrigin.Begin);
                byte[] PageBytes = new byte[msTemp.Length];
                msTemp.Read(PageBytes, 0, PageBytes.Length);

                msTemp.Seek(0, SeekOrigin.Begin);
                int               DetLen     = 0;
                byte[]            DetectBuff = new byte[4096];
                UniversalDetector Det        = new UniversalDetector(null);
                while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                {
                    Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                }
                Det.DataEnd();
                if (Det.GetDetectedCharset() != null)
                {
                    tmp_result = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                }
                else
                {
                    EchoHelper.Echo("编码识别失败,请手工转码为UTF8保存到任务文件夹。文件:" + file.Name.ToLower(), "编码识别", EchoHelper.EchoType.任务信息);
                }
            }
            return(tmp_result);
        }
예제 #12
0
        private async void button_Click(object sender, RoutedEventArgs e)
        {
            CharSetBox.Text  = "";
            PageBox.Text     = "";
            button.IsEnabled = false;
            try
            {
                HttpWebRequest  hwr = (HttpWebRequest)HttpWebRequest.Create(UrlBox.Text);
                HttpWebResponse res;
                try
                {
                    res = (HttpWebResponse)await hwr.GetResponseAsync();
                }
                catch
                {
                    CharSetBox.Text = "网页获取错误!";
                    return;
                }

                if (res.StatusCode == HttpStatusCode.OK)
                {
                    Stream       mystream = res.GetResponseStream();
                    MemoryStream msTemp   = new MemoryStream();
                    int          len      = 0;
                    byte[]       buff     = new byte[512];

                    while ((len = mystream.Read(buff, 0, 512)) > 0)
                    {
                        msTemp.Write(buff, 0, len);
                    }
                    res.Dispose();

                    if (msTemp.Length > 0)
                    {
                        msTemp.Seek(0, SeekOrigin.Begin);
                        byte[] PageBytes = new byte[msTemp.Length];
                        msTemp.Read(PageBytes, 0, PageBytes.Length);

                        msTemp.Seek(0, SeekOrigin.Begin);
                        int               DetLen     = 0;
                        byte[]            DetectBuff = new byte[4096];
                        UniversalDetector Det        = new UniversalDetector(null);
                        while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !Det.IsDone())
                        {
                            Det.HandleData(DetectBuff, 0, DetectBuff.Length);
                        }
                        Det.DataEnd();
                        if (Det.GetDetectedCharset() != null)
                        {
                            CharSetBox.Text = "OK! CharSet=" + Det.GetDetectedCharset();
                            string page = System.Text.Encoding.GetEncoding(Det.GetDetectedCharset()).GetString(PageBytes);
                            if (page.Length > 2000)
                            {
                                page = page.Substring(0, 2000);
                            }
                            PageBox.Text = page;
                        }
                    }
                }
            }
            catch
            {
            }
            finally
            {
                button.IsEnabled = true;
            }
        }
예제 #13
0
        /// <summary>
        /// 解析编码并获得字符串
        /// </summary>
        /// <param name="buffer"></param>
        /// <returns></returns>
        public string GetString(byte[] buffer)
        {
            string result = string.Empty;

            if (buffer == null)
            {
                return(result);
            }

            using (MemoryStream msTemp = new MemoryStream(buffer))
            {
                if (msTemp.Length > 0)
                {
                    msTemp.Seek(0, SeekOrigin.Begin);
                    int    DetLen     = 0;
                    byte[] DetectBuff = new byte[4096];

                    UniversalDetector det = new UniversalDetector(null);
                    while ((DetLen = msTemp.Read(DetectBuff, 0, DetectBuff.Length)) > 0 && !det.IsDone())
                    {
                        det.HandleData(DetectBuff, 0, DetectBuff.Length);
                    }
                    det.DataEnd();
                    if (det.GetDetectedCharset() != null)
                    {
                        try
                        {
                            result = System.Text.Encoding.GetEncoding(det.GetDetectedCharset()).GetString(buffer);
                        }
                        catch (ArgumentException)
                        {
                        }
                    }
                }
            }

            return(result);
        }