Пример #1
0
        /// <summary>
        /// 使用Http Request获取网页信息
        /// </summary>
        /// <param name="url">Url</param>
        /// <param name="postData">Post的信息</param>
        /// <param name="cookies">Cookies</param>
        /// <param name="userAgent">浏览器标识</param>
        /// <param name="referer">来源页</param>
        /// <param name="cookiesDomain">Cookies的Domian参数,配合cookies使用;为空则取url的Host</param>
        /// <param name="encode">编码方式,用于解析html</param>
        /// <param name="method">提交方式,例如POST或GET,默认通过postData是否为空判断</param>
        /// <param name="proxy"></param>
        /// <param name="encoding"></param>
        /// <param name="contentType"></param>
        /// <param name="timeout"></param>
        /// <returns></returns>
        public static HttpResult HttpRequest(string url, string postData = null, CookieContainer cookies = null, string userAgent = null, string referer = null, string cookiesDomain = null, Encoding encode = null, string method = null, IWebProxy proxy = null, string encoding = null, string contentType = null, int timeout = 8000)
        {
            HttpResult httpResponse = new HttpResult();

            try
            {
                HttpWebResponse httpWebResponse = null;
                if (!string.IsNullOrEmpty(postData) || (!string.IsNullOrEmpty(method) && method.ToUpper() == "POST"))
                    httpWebResponse = CreatePostHttpResponse(url, postData, timeout, userAgent, cookies, referer, proxy, contentType);
                else
                    httpWebResponse = CreateGetHttpResponse(url, timeout, userAgent, cookies, referer, proxy, contentType);

                httpResponse.Url = httpWebResponse.ResponseUri.ToString();
                httpResponse.HttpCode = (int)httpWebResponse.StatusCode;
                httpResponse.LastModified = TimeHelper.ConvertDateTimeInt(httpWebResponse.LastModified);

                string Content = null;
                //头部预读取缓冲区,字节形式
                var bytes = new List<byte>();
                //头部预读取缓冲区,字符串
                String cache = string.Empty;

                //创建流对象并解码
                Stream ResponseStream;
                switch (httpWebResponse.ContentEncoding.ToUpperInvariant())
                {
                    case "GZIP":
                        ResponseStream = new GZipStream(
                            httpWebResponse.GetResponseStream(), CompressionMode.Decompress);
                        break;
                    case "DEFLATE":
                        ResponseStream = new DeflateStream(
                            httpWebResponse.GetResponseStream(), CompressionMode.Decompress);
                        break;
                    default:
                        ResponseStream = httpWebResponse.GetResponseStream();
                        break;
                }

                try
                {
                    while (true)
                    {
                        var b = (byte)ResponseStream.ReadByte();
                        if (b < 0 || b == 255) //end of stream
                            break;
                        bytes.Add(b);

                        if (!cache.EndsWith("</head>", StringComparison.OrdinalIgnoreCase))
                            cache += (char)b;
                    }

                    // Charset check: input > NChardet > Parser
                    if (encode == null)
                    {
                        string charset = NChardetHelper.RecogCharset(bytes.ToArray());
                        if (!string.IsNullOrEmpty(charset))
                            encode = Encoding.GetEncoding(charset);

                        if (encode == null)
                        {
                            if (httpWebResponse.CharacterSet == "ISO-8859-1" || httpWebResponse.CharacterSet == "zh-cn")
                            {
                                Match match = Regex.Match(cache, CharsetReg, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                                if (match.Success)
                                {
                                    try
                                    {
                                        charset = match.Groups["Charset"].Value;
                                        encode = Encoding.GetEncoding(charset);
                                    }
                                    catch { }
                                }
                            }

                            if (httpWebResponse.CharacterSet != null && encode == null)
                                encode = Encoding.GetEncoding(httpWebResponse.CharacterSet);
                        }
                    }

                    if (encode == null)
                        encode = Encoding.Default;

                    Content = encode.GetString(bytes.ToArray());
                    ResponseStream.Close();
                }
                catch (Exception ex)
                {
                    httpResponse.Content = ex.ToString();
                    return httpResponse;
                }
                finally
                {
                    httpWebResponse.Close();
                }

                //get the Cookies,support httponly.
                if (string.IsNullOrEmpty(cookiesDomain))
                    cookiesDomain = httpWebResponse.ResponseUri.Host;

                cookies = new CookieContainer();
                CookieCollection httpHeaderCookies = SetCookie(httpWebResponse, cookiesDomain);
                cookies.Add(httpHeaderCookies ?? httpWebResponse.Cookies);

                httpResponse.Content = Content;
            }
            catch
            {
                httpResponse.Content = string.Empty;
            }
            return httpResponse;
        }
Пример #2
0
        /// <summary>
        /// 使用Http Request获取网页信息
        /// </summary>
        /// <param name="url">Url</param>
        /// <param name="postData">Post的信息</param>
        /// <param name="cookies">Cookies</param>
        /// <param name="userAgent">浏览器标识</param>
        /// <param name="referer">来源页</param>
        /// <param name="cookiesDomain">Cookies的Domian参数,配合cookies使用;为空则取url的Host</param>
        /// <param name="encode">编码方式,用于解析html</param>
        /// <param name="method">提交方式,例如POST或GET,默认通过postData是否为空判断</param>
        /// <param name="proxy"></param>
        /// <param name="encoding"></param>
        /// <param name="contentType"></param>
        /// <param name="timeout"></param>
        /// <returns></returns>
        public static HttpResult HttpRequest(string url, string postData = null, CookieContainer cookies = null, string userAgent = null, string referer = null, string cookiesDomain = null, Encoding encode = null, string method = null, IWebProxy proxy = null, string encoding = null, string contentType = null, int timeout = 8000, Dictionary<string, string> headers = null)
        {
            HttpResult httpResponse = new HttpResult();

            try
            {
                HttpWebResponse httpWebResponse = null;
                if (!string.IsNullOrEmpty(postData) || (!string.IsNullOrEmpty(method) && method.ToUpper() == "POST"))
                    httpWebResponse = CreatePostHttpResponse(url, postData, timeout, userAgent, cookies, referer, proxy, contentType, headers);
                else
                    httpWebResponse = CreateGetHttpResponse(url, timeout, userAgent, cookies, referer, proxy, contentType, headers);

                httpResponse.Url = httpWebResponse.ResponseUri.ToString();
                httpResponse.HttpCode = (int)httpWebResponse.StatusCode;
                httpResponse.LastModified = TimeHelper.ConvertDateTimeInt(httpWebResponse.LastModified);

                string Content = null;
                //头部预读取缓冲区,字节形式
                var bytes = new List<byte>();
                //头部预读取缓冲区,字符串
                String cache = string.Empty;

                //创建流对象并解码
                Stream ResponseStream;
                switch (httpWebResponse.ContentEncoding.ToUpperInvariant())
                {
                    case "GZIP":
                        ResponseStream = new GZipStream(
                            httpWebResponse.GetResponseStream(), CompressionMode.Decompress);
                        break;
                    case "DEFLATE":
                        ResponseStream = new DeflateStream(
                            httpWebResponse.GetResponseStream(), CompressionMode.Decompress);
                        break;
                    default:
                        ResponseStream = httpWebResponse.GetResponseStream();
                        break;
                }

                try
                {
                    while (true)
                    {
                        var b = ResponseStream.ReadByte();
                        if (b < 0) //end of stream
                            break;
                        bytes.Add((byte)b);

                        if (!cache.EndsWith("</head>", StringComparison.OrdinalIgnoreCase))
                            cache += (char)b;
                    }

                    string Ncharset = "";
                    string Hcharset = "";
                    string Rcharset = "";

                    //1,使用解析ContentType,解析Html编码声明,自动编码识别三种来猜测编码,选取任意两者相同的编码
                    if (encode == null)
                    {
                        Match match = Regex.Match(cache, CharsetReg, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                        if (match.Success)
                            Rcharset = match.Groups["Charset"].Value;

                        try
                        {
                            string text = "";
                            if (!string.IsNullOrEmpty(text = httpWebResponse.ContentType))
                            {
                                text = text.ToLower(CultureInfo.InvariantCulture);
                                string[] array = text.Split(new char[] { ';', '=', ' ' });
                                bool flag = false;
                                string[] array2 = array;
                                for (int i = 0; i < array2.Length; i++)
                                {
                                    string text2 = array2[i];
                                    if (text2 == "charset")
                                        flag = true;
                                    else
                                    {
                                        if (flag)
                                            Hcharset = text2;
                                    }
                                }
                            }

                        }
                        catch { }

                        if (!string.IsNullOrEmpty(Rcharset) && !string.IsNullOrEmpty(Hcharset) && Hcharset.ToUpper() == Rcharset.ToUpper())
                            encode = Encoding.GetEncoding(Hcharset);
                        else
                        {
                            Ncharset = NChardetHelper.RecogCharset(bytes.ToArray(), Thrinax.Data.NChardetLanguage.CHINESE, -1);

                            if (!string.IsNullOrEmpty(Ncharset) && (Ncharset.ToUpper() == Rcharset.ToUpper() || Ncharset.ToUpper() == Hcharset.ToUpper()))
                                encode = Encoding.GetEncoding(Ncharset);
                        }

                    }

                    //2,使用人工标注的编码
                    if (encode == null && !string.IsNullOrEmpty(encoding))
                    {
                        try
                        {
                            encode = Encoding.GetEncoding(encoding);
                        }
                        catch { }
                    }

                    //3,使用单一方式识别出的编码,网页自动识别 > 解析ContentType > 解析Html编码声明
                    if (encode == null && !string.IsNullOrEmpty(Ncharset))
                        encode = Encoding.GetEncoding(Ncharset);
                    if(encode == null && !string.IsNullOrEmpty(Hcharset))
                        encode = Encoding.GetEncoding(Hcharset);
                    if (encode == null && !string.IsNullOrEmpty(Rcharset))
                        encode = Encoding.GetEncoding(Rcharset);

                    //4,使用默认编码,听天由命吧
                    if (encode == null)
                        encode = Encoding.Default;

                    Content = encode.GetString(bytes.ToArray());
                    ResponseStream.Close();
                }
                catch (Exception ex)
                {
                    httpResponse.Content = ex.ToString();
                    return httpResponse;
                }
                finally
                {
                    httpWebResponse.Close();
                }

                //get the Cookies,support httponly.
                if (string.IsNullOrEmpty(cookiesDomain))
                    cookiesDomain = httpWebResponse.ResponseUri.Host;

                cookies = new CookieContainer();
                CookieCollection httpHeaderCookies = SetCookie(httpWebResponse, cookiesDomain);
                cookies.Add(httpHeaderCookies ?? httpWebResponse.Cookies);

                httpResponse.Content = Content;
            }
            catch(Exception ex)
            {
                httpResponse.Content = ex.ToString();
                httpResponse.HttpCode = DetermineResultStatus(ex);
            }
            return httpResponse;
        }