/// <summary> /// 获取给定URL的内容,失败时返回NULL /// </summary> /// <param name="url">Url实例</param> /// <returns>Content实例或NULL</returns> private Content GetContent(Url url) { if (null == url) { return null; } string s = url.GetUrl(); HttpWebRequest request = WebRequest.Create(s) as HttpWebRequest; this.settings.PrepareRequest(request); //带参数的POST请求 if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0) { request.Method = "POST"; request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", this.settings.RequestEncoding.WebName); byte[] data = GetRequestData(url.AppendParams, this.settings.RequestEncoding); request.ContentLength = data.Length; using (Stream reqStream = request.GetRequestStream()) { reqStream.Write(data, 0, data.Length); } this.runtime.bytesSent += data.Length; } HttpWebResponse response = null; Content content = null; try { response = request.GetResponse() as HttpWebResponse; } catch (WebException we) { if (Environment.UserInteractive) { Console.WriteLine(String.Format("WebException状态:{0}, 消息:{1}", we.Status, we.Message)); } url.SetError(we.Message); if (we.Status == WebExceptionStatus.Timeout || we.Status == WebExceptionStatus.ReceiveFailure || we.Status == WebExceptionStatus.UnknownError) { //普通或者慢速模式下,进行失败重试 if (((int)this.settings.SpeedMode) >= ((int)SpiderSetting.SpeedModes.Normal) && url.MaxTryTimes > 0 && url.CanTryAgain) { url.AddTryTime(); this.urlQueue.Add(url); } } return null; } catch (Exception e) { if (Environment.UserInteractive) { this.WriteLog(e.Message); } return null; } finally { this.runtime.urlTotal++; } if (null == response) { return null; } url.ClearError(); using (Stream stream = response.GetResponseStream()) { RequestContext context = new RequestContext(this.settings, url, response); content = Content.Create(context); if (null != content) { content.Read(stream); this.runtime.bytesLoaded += content.ContentLength; } } return content; }
/// <summary> /// 构造函数 /// </summary> /// <param name="context">请求的上下文</param> internal TextContent(RequestContext context) : base(context) { this.content = ""; }
/// <summary> /// 获取给定URL的内容,失败时返回NULL /// </summary> /// <param name="url">Url</param> /// <param name="requestEncoding">请求编码</param> /// <param name="userAgent">User-Agent头</param> /// <param name="cookies">Cookies</param> /// <param name="proxy">网络代理</param> /// <param name="referer">URL-Referer头</param> /// <param name="requestTimeout">请求超时</param> /// <param name="iOTimeout">读写超时</param> /// <param name="allowRedirect">跳转设置,大于0表示允许跳转此值所表示的次数,否则不允许跳转</param> /// <returns>Content or NULL</returns> public static Content GetContent(Url url, Encoding requestEncoding, string userAgent, CookieContainer cookies, IWebProxy proxy, string referer, int requestTimeout, int iOTimeout, int allowRedirect) { if (null == url) { return null; } HttpWebRequest request = WebRequest.Create(url.GetUrl()) as HttpWebRequest; request.UserAgent = userAgent; if (null != cookies) { request.CookieContainer = cookies; } if (null != proxy) { request.Proxy = proxy; } if (!String.IsNullOrEmpty(referer)) { request.Referer = referer; } if (requestTimeout > 0) { request.Timeout = requestTimeout; } if (iOTimeout > 0) { request.ReadWriteTimeout = iOTimeout; } if (allowRedirect > 0) { request.AllowAutoRedirect = true; request.MaximumAutomaticRedirections = allowRedirect; } //带参数的POST请求 if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0) { request.Method = "POST"; request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", requestEncoding.WebName); byte[] data = GetRequestData(url.AppendParams, requestEncoding); request.ContentLength = data.Length; using (Stream reqStream = request.GetRequestStream()) { reqStream.Write(data, 0, data.Length); } } HttpWebResponse response = null; Content content = null; try { response = request.GetResponse() as HttpWebResponse; } catch (Exception e) { if (Environment.UserInteractive) { Console.WriteLine(e); } return null; } if (null == response) { return null; } url.ClearError(); using (Stream stream = response.GetResponseStream()) { RequestContext context = new RequestContext(null, url, response); content = Content.Create(context); if (null != content) { content.Read(stream); } } return content; }
/// <summary> /// 构造函数 /// </summary> /// <param name="context">请求上下文</param> internal BinaryContent(RequestContext context) : base(context) { // }
protected Content(SerializationInfo info, StreamingContext context) { this.rawUrl = info.GetValue("rawUrl", typeof(Url)) as Url; this.context = info.GetValue("context", typeof(RequestContext)) as RequestContext; this.contentLength = info.GetInt64("contentLength"); }
private Url rawUrl; //当前内容的原始URL #endregion Fields #region Constructors /// <summary> /// 构造函数 /// </summary> /// <param name="context">请求上下文</param> protected Content(RequestContext context) { this.context = context; this.rawUrl = this.context.RequestUrl; this.contentLength = InValidContentLength; }
public virtual void Dispose() { this.context = null; }
/// <summary> /// 依据上下文参数创建Content实例,内容类型无效时返回NULL /// </summary> /// <param name="context">上下文</param> /// <returns>Content</returns> public static Content Create(RequestContext context) { switch (context.ContentType) { case ContentType.Text: return new TextContent(context); case ContentType.Binary: return new BinaryContent(context); } return null; }