/// <summary>
        /// 获取给定URL的内容,失败时返回NULL
        /// </summary>
        /// <param name="url">Url实例</param>
        /// <returns>Content实例或NULL</returns>
        private Content GetContent(Url url)
        {
            if (null == url)
            {
                return null;
            }
            string s = url.GetUrl();
            HttpWebRequest request = WebRequest.Create(s) as HttpWebRequest;
            this.settings.PrepareRequest(request);

            //带参数的POST请求
            if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0)
            {
                request.Method = "POST";
                request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", this.settings.RequestEncoding.WebName);
                byte[] data = GetRequestData(url.AppendParams, this.settings.RequestEncoding);
                request.ContentLength = data.Length;
                using (Stream reqStream = request.GetRequestStream())
                {
                    reqStream.Write(data, 0, data.Length);
                }
                this.runtime.bytesSent += data.Length;
            }

            HttpWebResponse response = null;
            Content content = null;

            try
            {
                response = request.GetResponse() as HttpWebResponse;
            }
            catch (WebException we)
            {
                if (Environment.UserInteractive)
                {
                    Console.WriteLine(String.Format("WebException状态:{0}, 消息:{1}", we.Status, we.Message));
                }

                url.SetError(we.Message);

                if (we.Status == WebExceptionStatus.Timeout || we.Status == WebExceptionStatus.ReceiveFailure || we.Status == WebExceptionStatus.UnknownError)
                {
                    //普通或者慢速模式下,进行失败重试
                    if (((int)this.settings.SpeedMode) >= ((int)SpiderSetting.SpeedModes.Normal) && url.MaxTryTimes > 0 && url.CanTryAgain)
                    {
                        url.AddTryTime();
                        this.urlQueue.Add(url);
                    }
                }

                return null;
            }
            catch (Exception e)
            {
                if (Environment.UserInteractive)
                {
                    this.WriteLog(e.Message);
                }
                return null;
            }
            finally
            {
                this.runtime.urlTotal++;
            }

            if (null == response)
            {
                return null;
            }

            url.ClearError();

            using (Stream stream = response.GetResponseStream())
            {
                RequestContext context = new RequestContext(this.settings, url, response);

                content = Content.Create(context);

                if (null != content)
                {
                    content.Read(stream);
                    this.runtime.bytesLoaded += content.ContentLength;
                }
            }

            return content;
        }
 /// <summary>
 /// 构造函数
 /// </summary>
 /// <param name="context">请求的上下文</param>
 internal TextContent(RequestContext context)
     : base(context)
 {
     this.content = "";
 }
        /// <summary>
        /// 获取给定URL的内容,失败时返回NULL
        /// </summary>
        /// <param name="url">Url</param>
        /// <param name="requestEncoding">请求编码</param>
        /// <param name="userAgent">User-Agent头</param>
        /// <param name="cookies">Cookies</param>
        /// <param name="proxy">网络代理</param>
        /// <param name="referer">URL-Referer头</param>
        /// <param name="requestTimeout">请求超时</param>
        /// <param name="iOTimeout">读写超时</param>
        /// <param name="allowRedirect">跳转设置,大于0表示允许跳转此值所表示的次数,否则不允许跳转</param>
        /// <returns>Content or NULL</returns>
        public static Content GetContent(Url url, Encoding requestEncoding, string userAgent, CookieContainer cookies, IWebProxy proxy, string referer, int requestTimeout, int iOTimeout, int allowRedirect)
        {
            if (null == url)
            {
                return null;
            }

            HttpWebRequest request = WebRequest.Create(url.GetUrl()) as HttpWebRequest;

            request.UserAgent = userAgent;
            if (null != cookies)
            {
                request.CookieContainer = cookies;
            }
            if (null != proxy)
            {
                request.Proxy = proxy;
            }
            if (!String.IsNullOrEmpty(referer))
            {
                request.Referer = referer;
            }
            if (requestTimeout > 0)
            {
                request.Timeout = requestTimeout;
            }
            if (iOTimeout > 0)
            {
                request.ReadWriteTimeout = iOTimeout;
            }
            if (allowRedirect > 0)
            {
                request.AllowAutoRedirect = true;
                request.MaximumAutomaticRedirections = allowRedirect;
            }

            //带参数的POST请求
            if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0)
            {
                request.Method = "POST";
                request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", requestEncoding.WebName);
                byte[] data = GetRequestData(url.AppendParams, requestEncoding);
                request.ContentLength = data.Length;
                using (Stream reqStream = request.GetRequestStream())
                {
                    reqStream.Write(data, 0, data.Length);
                }
            }

            HttpWebResponse response = null;
            Content content = null;

            try
            {
                response = request.GetResponse() as HttpWebResponse;
            }
            catch (Exception e)
            {
                if (Environment.UserInteractive)
                {
                    Console.WriteLine(e);
                }
                return null;
            }

            if (null == response)
            {
                return null;
            }

            url.ClearError();

            using (Stream stream = response.GetResponseStream())
            {
                RequestContext context = new RequestContext(null, url, response);

                content = Content.Create(context);

                if (null != content)
                {
                    content.Read(stream);
                }
            }

            return content;
        }
 /// <summary>
 /// 构造函数
 /// </summary>
 /// <param name="context">请求上下文</param>
 internal BinaryContent(RequestContext context)
     : base(context)
 {
     //
 }
Example #5
0
 protected Content(SerializationInfo info, StreamingContext context)
 {
     this.rawUrl = info.GetValue("rawUrl", typeof(Url)) as Url;
     this.context = info.GetValue("context", typeof(RequestContext)) as RequestContext;
     this.contentLength = info.GetInt64("contentLength");
 }
Example #6
0
        private Url rawUrl; //当前内容的原始URL

        #endregion Fields

        #region Constructors

        /// <summary>
        /// 构造函数
        /// </summary>
        /// <param name="context">请求上下文</param>
        protected Content(RequestContext context)
        {
            this.context = context;
            this.rawUrl = this.context.RequestUrl;
            this.contentLength = InValidContentLength;
        }
Example #7
0
 public virtual void Dispose()
 {
     this.context = null;
 }
Example #8
0
 /// <summary>
 /// 依据上下文参数创建Content实例,内容类型无效时返回NULL
 /// </summary>
 /// <param name="context">上下文</param>
 /// <returns>Content</returns>
 public static Content Create(RequestContext context)
 {
     switch (context.ContentType)
     {
         case ContentType.Text:
             return new TextContent(context);
         case ContentType.Binary:
             return new BinaryContent(context);
     }
     return null;
 }