Beispiel #1
0
        /// <summary>
        /// 自动检测内容的charset,通过字符集和html中指定的contentType来获取
        /// 如果无法获得字符集,则返回null
        /// </summary>
        /// <param name="stringcontent"></param>
        /// <returns></returns>
        public static Encoding DetectCharset(QuickWebResponse response, byte[] bytes)
        {
            string regexstr = "(text/html|text/xml).*charset=(?<charset>\\w+\\-*\\d*)";
            Regex regex = new Regex(regexstr, RegexOptions.IgnoreCase);

            string contentType = response.ContentTypeStr;
            if (contentType != null)
            {
                Match match1 = regex.Match(contentType);
                if (match1.Success)
                {
                    string charset = match1.Groups["charset"].Value.ToUpper();
                    Encoding encoder = System.Text.Encoding.GetEncoding(charset);
                    if (encoder != null)
                    {
                        return encoder;
                    }
                }
            }

            string ascii = System.Text.Encoding.ASCII.GetString(bytes);
            //<META http-equiv="Content-Type" content="text/html; charset=GB2312">
            //Content-Type=text/html;

            Match match = regex.Match(ascii);
            if ( match.Success )
            {
                string charset =  match.Groups["charset"].Value.ToUpper();
                return System.Text.Encoding.GetEncoding(charset);
            }
            else {
                return null;
            }
        }
Beispiel #2
0
        public QuickWebResponse GetResponse(int Timeout)
        {
            if (response == null || !response.SocketConnected)
            {
                response = new QuickWebResponse(Timeout);
                response.ConnectAndGetHeader(this);
            }
            else
            {
                response.ConnectAndGetHeader(this);
            }

            return(response);
        }
Beispiel #3
0
        /// <summary>
        /// 自动检测内容的charset,通过字符集和html中指定的contentType来获取
        /// 如果无法获得字符集,则返回null
        /// </summary>
        /// <param name="stringcontent"></param>
        /// <returns></returns>
        public static Encoding  DetectCharset(QuickWebResponse response, byte[] bytes)
        {
            string regexstr = "(text/html|text/xml).*charset=(?<charset>\\w+\\-*\\d*)";
            Regex  regex    = new Regex(regexstr, RegexOptions.IgnoreCase);

            string contentType = response.ContentTypeStr;

            if (contentType != null)
            {
                Match match1 = regex.Match(contentType);
                if (match1.Success)
                {
                    string   charset = match1.Groups["charset"].Value.ToUpper();
                    Encoding encoder = System.Text.Encoding.GetEncoding(charset);
                    if (encoder != null)
                    {
                        return(encoder);
                    }
                }
            }

            string ascii = System.Text.Encoding.ASCII.GetString(bytes);
            //<META http-equiv="Content-Type" content="text/html; charset=GB2312">
            //Content-Type=text/html;

            Match match = regex.Match(ascii);

            if (match.Success)
            {
                string charset = match.Groups["charset"].Value.ToUpper();
                return(System.Text.Encoding.GetEncoding(charset));
            }
            else
            {
                return(null);
            }
        }
Beispiel #4
0
        public QuickWebResponse GetResponse(int Timeout)
        {
            if (response == null || !response.SocketConnected)
            {
                response = new QuickWebResponse(Timeout);
                response.ConnectAndGetHeader(this);
            }
            else
            {
                response.ConnectAndGetHeader(this);
            }

            return response;
        }
        private string ReadUrlContent(ref string url, bool checkredirect, bool checkHtmlRedurect,int timeoutsecond)
        {
            string content = String.Empty;

            try
            {
                Uri uri = new Uri(url);
                gRequest = QuickWebRequest.Create(uri, null, gRequest, false);
                gResponse = gRequest.GetResponse(timeoutsecond);

                if (checkredirect)
                {
                    if (gResponse.RedirectUri != null && gResponse.RedirectUri.AbsoluteUri != url)
                    {
                        return ReadUrlContent(gResponse.RedirectUri.AbsoluteUri, false, checkHtmlRedurect);
                    }
                }

                byte[] bytes = gResponse.ReadResponse();
                if (bytes == null)
                {
                    return null;
                }

                System.Text.Encoding charset = HttpUtils.DetectCharset(gResponse, bytes);
                if (charset == null)
                { //默认使用GB2312
                    charset = System.Text.Encoding.GetEncoding("GB2312");
                }

                // 使用指定的编码
                if (!string.IsNullOrEmpty(Encoding))
                {
                    charset = System.Text.Encoding.GetEncoding(Encoding);
                }

                //完成到UTF8的编码转换
                if (charset != System.Text.Encoding.UTF8)
                {
                    bytes = System.Text.Encoding.Convert(charset, System.Text.Encoding.UTF8, bytes);
                    charset = System.Text.Encoding.UTF8;
                }

                content = charset.GetString(bytes);
                //检查一次Html重定向
                if (checkHtmlRedurect)
                {
                    System.Text.RegularExpressions.Regex redirectRegex = new System.Text.RegularExpressions.Regex("<META\\s+HTTP-EQUIV\\s*=\\s*[\"]*Refresh[\"]*\\s+CONTENT=[\"\\s]*\\d+\\s*[;]\\s*URL=(?<match>.*?)[\\s\"]*>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                    System.Text.RegularExpressions.Match match = redirectRegex.Match(content);
                    if (match.Success)
                    {
                        if (!string.IsNullOrEmpty(match.Groups["match"].Value) && match.Groups["match"].Value != url)
                        {
                            url = match.Groups["match"].Value;
                            return ReadUrlContent(url, checkredirect, false);
                        }
                    }
                }
            }
            catch
            {
                //暂时不做处理。
            }
            finally
            {
                gResponse.Close();
            }

            return content;
        }
Beispiel #6
0
        private string ReadUrlContent(ref string url, bool checkredirect, bool checkHtmlRedurect, int timeoutsecond)
        {
            string content = String.Empty;

            try
            {
                Uri uri = new Uri(url);
                gRequest  = QuickWebRequest.Create(uri, null, gRequest, false);
                gResponse = gRequest.GetResponse(timeoutsecond);

                if (checkredirect)
                {
                    if (gResponse.RedirectUri != null && gResponse.RedirectUri.AbsoluteUri != url)
                    {
                        return(ReadUrlContent(gResponse.RedirectUri.AbsoluteUri, false, checkHtmlRedurect));
                    }
                }

                byte[] bytes = gResponse.ReadResponse();
                if (bytes == null)
                {
                    return(null);
                }

                System.Text.Encoding charset = HttpUtils.DetectCharset(gResponse, bytes);
                if (charset == null)
                { //默认使用GB2312
                    charset = System.Text.Encoding.GetEncoding("GB2312");
                }

                // 使用指定的编码
                if (!string.IsNullOrEmpty(Encoding))
                {
                    charset = System.Text.Encoding.GetEncoding(Encoding);
                }

                //完成到UTF8的编码转换
                if (charset != System.Text.Encoding.UTF8)
                {
                    bytes   = System.Text.Encoding.Convert(charset, System.Text.Encoding.UTF8, bytes);
                    charset = System.Text.Encoding.UTF8;
                }

                content = charset.GetString(bytes);
                //检查一次Html重定向
                if (checkHtmlRedurect)
                {
                    System.Text.RegularExpressions.Regex redirectRegex = new System.Text.RegularExpressions.Regex("<META\\s+HTTP-EQUIV\\s*=\\s*[\"]*Refresh[\"]*\\s+CONTENT=[\"\\s]*\\d+\\s*[;]\\s*URL=(?<match>.*?)[\\s\"]*>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                    System.Text.RegularExpressions.Match match         = redirectRegex.Match(content);
                    if (match.Success)
                    {
                        if (!string.IsNullOrEmpty(match.Groups["match"].Value) && match.Groups["match"].Value != url)
                        {
                            url = match.Groups["match"].Value;
                            return(ReadUrlContent(url, checkredirect, false));
                        }
                    }
                }
            }
            catch
            {
                //暂时不做处理。
            }
            finally
            {
                gResponse.Close();
            }

            return(content);
        }