/// <summary> /// 获取给定URL的内容,失败时返回NULL /// </summary> /// <param name="url">Url</param> /// <param name="requestEncoding">请求编码</param> /// <param name="userAgent">User-Agent头</param> /// <param name="cookies">Cookies</param> /// <param name="proxy">网络代理</param> /// <param name="referer">URL-Referer头</param> /// <param name="requestTimeout">请求超时</param> /// <param name="iOTimeout">读写超时</param> /// <param name="allowRedirect">跳转设置,大于0表示允许跳转此值所表示的次数,否则不允许跳转</param> /// <returns>Content or NULL</returns> public static Content GetContent(Url url, Encoding requestEncoding, string userAgent, CookieContainer cookies, IWebProxy proxy, string referer, int requestTimeout, int iOTimeout, int allowRedirect) { if (null == url) { return(null); } HttpWebRequest request = WebRequest.Create(url.GetUrl()) as HttpWebRequest; request.UserAgent = userAgent; if (null != cookies) { request.CookieContainer = cookies; } if (null != proxy) { request.Proxy = proxy; } if (!String.IsNullOrEmpty(referer)) { request.Referer = referer; } if (requestTimeout > 0) { request.Timeout = requestTimeout; } if (iOTimeout > 0) { request.ReadWriteTimeout = iOTimeout; } if (allowRedirect > 0) { request.AllowAutoRedirect = true; request.MaximumAutomaticRedirections = allowRedirect; } //带参数的POST请求 if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0) { request.Method = "POST"; request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", requestEncoding.WebName); byte[] data = GetRequestData(url.AppendParams, requestEncoding); request.ContentLength = data.Length; using (Stream reqStream = request.GetRequestStream()) { reqStream.Write(data, 0, data.Length); } } HttpWebResponse response = null; Content content = null; try { response = request.GetResponse() as HttpWebResponse; } catch (Exception e) { if (Environment.UserInteractive) { Console.WriteLine(e); } return(null); } if (null == response) { return(null); } url.ClearError(); using (Stream stream = response.GetResponseStream()) { RequestContext context = new RequestContext(null, url, response); content = Content.Create(context); if (null != content) { content.Read(stream); } } return(content); }
/// <summary> /// 获取给定URL的内容,失败时返回NULL /// </summary> /// <param name="url">Url实例</param> /// <returns>Content实例或NULL</returns> private Content GetContent(Url url) { if (null == url) { return(null); } string s = url.GetUrl(); HttpWebRequest request = WebRequest.Create(s) as HttpWebRequest; this.settings.PrepareRequest(request); //带参数的POST请求 if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0) { request.Method = "POST"; request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", this.settings.RequestEncoding.WebName); byte[] data = GetRequestData(url.AppendParams, this.settings.RequestEncoding); request.ContentLength = data.Length; using (Stream reqStream = request.GetRequestStream()) { reqStream.Write(data, 0, data.Length); } this.runtime.bytesSent += data.Length; } HttpWebResponse response = null; Content content = null; try { response = request.GetResponse() as HttpWebResponse; } catch (WebException we) { if (Environment.UserInteractive) { Console.WriteLine(String.Format("WebException状态:{0}, 消息:{1}", we.Status, we.Message)); } url.SetError(we.Message); if (we.Status == WebExceptionStatus.Timeout || we.Status == WebExceptionStatus.ReceiveFailure || we.Status == WebExceptionStatus.UnknownError) { //普通或者慢速模式下,进行失败重试 if (((int)this.settings.SpeedMode) >= ((int)SpiderSetting.SpeedModes.Normal) && url.MaxTryTimes > 0 && url.CanTryAgain) { url.AddTryTime(); this.urlQueue.Add(url); } } return(null); } catch (Exception e) { if (Environment.UserInteractive) { this.WriteLog(e.Message); } return(null); } finally { this.runtime.urlTotal++; } if (null == response) { return(null); } url.ClearError(); using (Stream stream = response.GetResponseStream()) { RequestContext context = new RequestContext(this.settings, url, response); content = Content.Create(context); if (null != content) { content.Read(stream); this.runtime.bytesLoaded += content.ContentLength; } } return(content); }