/// <summary> /// 使用Http Request获取网页信息 /// </summary> /// <param name="url">Url</param> /// <param name="postData">Post的信息</param> /// <param name="cookies">Cookies</param> /// <param name="userAgent">浏览器标识</param> /// <param name="referer">来源页</param> /// <param name="cookiesDomain">Cookies的Domian参数,配合cookies使用;为空则取url的Host</param> /// <param name="encode">编码方式,用于解析html</param> /// <param name="method">提交方式,例如POST或GET,默认通过postData是否为空判断</param> /// <param name="proxy"></param> /// <param name="encoding"></param> /// <param name="contentType"></param> /// <param name="timeout"></param> /// <returns></returns> public static HttpResult HttpRequest(string url, string postData = null, CookieContainer cookies = null, string userAgent = null, string referer = null, string cookiesDomain = null, Encoding encode = null, string method = null, IWebProxy proxy = null, string encoding = null, string contentType = null, int timeout = 8000) { HttpResult httpResponse = new HttpResult(); try { HttpWebResponse httpWebResponse = null; if (!string.IsNullOrEmpty(postData) || (!string.IsNullOrEmpty(method) && method.ToUpper() == "POST")) httpWebResponse = CreatePostHttpResponse(url, postData, timeout, userAgent, cookies, referer, proxy, contentType); else httpWebResponse = CreateGetHttpResponse(url, timeout, userAgent, cookies, referer, proxy, contentType); httpResponse.Url = httpWebResponse.ResponseUri.ToString(); httpResponse.HttpCode = (int)httpWebResponse.StatusCode; httpResponse.LastModified = TimeHelper.ConvertDateTimeInt(httpWebResponse.LastModified); string Content = null; //头部预读取缓冲区,字节形式 var bytes = new List<byte>(); //头部预读取缓冲区,字符串 String cache = string.Empty; //创建流对象并解码 Stream ResponseStream; switch (httpWebResponse.ContentEncoding.ToUpperInvariant()) { case "GZIP": ResponseStream = new GZipStream( httpWebResponse.GetResponseStream(), CompressionMode.Decompress); break; case "DEFLATE": ResponseStream = new DeflateStream( httpWebResponse.GetResponseStream(), CompressionMode.Decompress); break; default: ResponseStream = httpWebResponse.GetResponseStream(); break; } try { while (true) { var b = (byte)ResponseStream.ReadByte(); if (b < 0 || b == 255) //end of stream break; bytes.Add(b); if (!cache.EndsWith("</head>", StringComparison.OrdinalIgnoreCase)) cache += (char)b; } // Charset check: input > NChardet > Parser if (encode == null) { string charset = NChardetHelper.RecogCharset(bytes.ToArray()); if (!string.IsNullOrEmpty(charset)) encode = Encoding.GetEncoding(charset); if (encode == null) { if (httpWebResponse.CharacterSet == "ISO-8859-1" || httpWebResponse.CharacterSet == "zh-cn") { Match match = Regex.Match(cache, CharsetReg, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (match.Success) { try { charset = match.Groups["Charset"].Value; encode = Encoding.GetEncoding(charset); } catch { } } } if (httpWebResponse.CharacterSet != null && encode == null) encode = Encoding.GetEncoding(httpWebResponse.CharacterSet); } } if (encode == null) encode = Encoding.Default; Content = encode.GetString(bytes.ToArray()); ResponseStream.Close(); } catch (Exception ex) { httpResponse.Content = ex.ToString(); return httpResponse; } finally { httpWebResponse.Close(); } //get the Cookies,support httponly. if (string.IsNullOrEmpty(cookiesDomain)) cookiesDomain = httpWebResponse.ResponseUri.Host; cookies = new CookieContainer(); CookieCollection httpHeaderCookies = SetCookie(httpWebResponse, cookiesDomain); cookies.Add(httpHeaderCookies ?? httpWebResponse.Cookies); httpResponse.Content = Content; } catch { httpResponse.Content = string.Empty; } return httpResponse; }
/// <summary> /// 使用Http Request获取网页信息 /// </summary> /// <param name="url">Url</param> /// <param name="postData">Post的信息</param> /// <param name="cookies">Cookies</param> /// <param name="userAgent">浏览器标识</param> /// <param name="referer">来源页</param> /// <param name="cookiesDomain">Cookies的Domian参数,配合cookies使用;为空则取url的Host</param> /// <param name="encode">编码方式,用于解析html</param> /// <param name="method">提交方式,例如POST或GET,默认通过postData是否为空判断</param> /// <param name="proxy"></param> /// <param name="encoding"></param> /// <param name="contentType"></param> /// <param name="timeout"></param> /// <returns></returns> public static HttpResult HttpRequest(string url, string postData = null, CookieContainer cookies = null, string userAgent = null, string referer = null, string cookiesDomain = null, Encoding encode = null, string method = null, IWebProxy proxy = null, string encoding = null, string contentType = null, int timeout = 8000, Dictionary<string, string> headers = null) { HttpResult httpResponse = new HttpResult(); try { HttpWebResponse httpWebResponse = null; if (!string.IsNullOrEmpty(postData) || (!string.IsNullOrEmpty(method) && method.ToUpper() == "POST")) httpWebResponse = CreatePostHttpResponse(url, postData, timeout, userAgent, cookies, referer, proxy, contentType, headers); else httpWebResponse = CreateGetHttpResponse(url, timeout, userAgent, cookies, referer, proxy, contentType, headers); httpResponse.Url = httpWebResponse.ResponseUri.ToString(); httpResponse.HttpCode = (int)httpWebResponse.StatusCode; httpResponse.LastModified = TimeHelper.ConvertDateTimeInt(httpWebResponse.LastModified); string Content = null; //头部预读取缓冲区,字节形式 var bytes = new List<byte>(); //头部预读取缓冲区,字符串 String cache = string.Empty; //创建流对象并解码 Stream ResponseStream; switch (httpWebResponse.ContentEncoding.ToUpperInvariant()) { case "GZIP": ResponseStream = new GZipStream( httpWebResponse.GetResponseStream(), CompressionMode.Decompress); break; case "DEFLATE": ResponseStream = new DeflateStream( httpWebResponse.GetResponseStream(), CompressionMode.Decompress); break; default: ResponseStream = httpWebResponse.GetResponseStream(); break; } try { while (true) { var b = ResponseStream.ReadByte(); if (b < 0) //end of stream break; bytes.Add((byte)b); if (!cache.EndsWith("</head>", StringComparison.OrdinalIgnoreCase)) cache += (char)b; } string Ncharset = ""; string Hcharset = ""; string Rcharset = ""; //1,使用解析ContentType,解析Html编码声明,自动编码识别三种来猜测编码,选取任意两者相同的编码 if (encode == null) { Match match = Regex.Match(cache, CharsetReg, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (match.Success) Rcharset = match.Groups["Charset"].Value; try { string text = ""; if (!string.IsNullOrEmpty(text = httpWebResponse.ContentType)) { text = text.ToLower(CultureInfo.InvariantCulture); string[] array = text.Split(new char[] { ';', '=', ' ' }); bool flag = false; string[] array2 = array; for (int i = 0; i < array2.Length; i++) { string text2 = array2[i]; if (text2 == "charset") flag = true; else { if (flag) Hcharset = text2; } } } } catch { } if (!string.IsNullOrEmpty(Rcharset) && !string.IsNullOrEmpty(Hcharset) && Hcharset.ToUpper() == Rcharset.ToUpper()) encode = Encoding.GetEncoding(Hcharset); else { Ncharset = NChardetHelper.RecogCharset(bytes.ToArray(), Thrinax.Data.NChardetLanguage.CHINESE, -1); if (!string.IsNullOrEmpty(Ncharset) && (Ncharset.ToUpper() == Rcharset.ToUpper() || Ncharset.ToUpper() == Hcharset.ToUpper())) encode = Encoding.GetEncoding(Ncharset); } } //2,使用人工标注的编码 if (encode == null && !string.IsNullOrEmpty(encoding)) { try { encode = Encoding.GetEncoding(encoding); } catch { } } //3,使用单一方式识别出的编码,网页自动识别 > 解析ContentType > 解析Html编码声明 if (encode == null && !string.IsNullOrEmpty(Ncharset)) encode = Encoding.GetEncoding(Ncharset); if(encode == null && !string.IsNullOrEmpty(Hcharset)) encode = Encoding.GetEncoding(Hcharset); if (encode == null && !string.IsNullOrEmpty(Rcharset)) encode = Encoding.GetEncoding(Rcharset); //4,使用默认编码,听天由命吧 if (encode == null) encode = Encoding.Default; Content = encode.GetString(bytes.ToArray()); ResponseStream.Close(); } catch (Exception ex) { httpResponse.Content = ex.ToString(); return httpResponse; } finally { httpWebResponse.Close(); } //get the Cookies,support httponly. if (string.IsNullOrEmpty(cookiesDomain)) cookiesDomain = httpWebResponse.ResponseUri.Host; cookies = new CookieContainer(); CookieCollection httpHeaderCookies = SetCookie(httpWebResponse, cookiesDomain); cookies.Add(httpHeaderCookies ?? httpWebResponse.Cookies); httpResponse.Content = Content; } catch(Exception ex) { httpResponse.Content = ex.ToString(); httpResponse.HttpCode = DetermineResultStatus(ex); } return httpResponse; }