예제 #1
0
        /// <summary>
        /// 获取给定URL的内容,失败时返回NULL
        /// </summary>
        /// <param name="url">Url</param>
        /// <param name="requestEncoding">请求编码</param>
        /// <param name="userAgent">User-Agent头</param>
        /// <param name="cookies">Cookies</param>
        /// <param name="proxy">网络代理</param>
        /// <param name="referer">URL-Referer头</param>
        /// <param name="requestTimeout">请求超时</param>
        /// <param name="iOTimeout">读写超时</param>
        /// <param name="allowRedirect">跳转设置,大于0表示允许跳转此值所表示的次数,否则不允许跳转</param>
        /// <returns>Content or NULL</returns>
        public static Content GetContent(Url url, Encoding requestEncoding, string userAgent, CookieContainer cookies, IWebProxy proxy, string referer, int requestTimeout, int iOTimeout, int allowRedirect)
        {
            if (null == url)
            {
                return(null);
            }

            HttpWebRequest request = WebRequest.Create(url.GetUrl()) as HttpWebRequest;

            request.UserAgent = userAgent;
            if (null != cookies)
            {
                request.CookieContainer = cookies;
            }
            if (null != proxy)
            {
                request.Proxy = proxy;
            }
            if (!String.IsNullOrEmpty(referer))
            {
                request.Referer = referer;
            }
            if (requestTimeout > 0)
            {
                request.Timeout = requestTimeout;
            }
            if (iOTimeout > 0)
            {
                request.ReadWriteTimeout = iOTimeout;
            }
            if (allowRedirect > 0)
            {
                request.AllowAutoRedirect            = true;
                request.MaximumAutomaticRedirections = allowRedirect;
            }

            //带参数的POST请求
            if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0)
            {
                request.Method      = "POST";
                request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", requestEncoding.WebName);
                byte[] data = GetRequestData(url.AppendParams, requestEncoding);
                request.ContentLength = data.Length;
                using (Stream reqStream = request.GetRequestStream())
                {
                    reqStream.Write(data, 0, data.Length);
                }
            }

            HttpWebResponse response = null;
            Content         content  = null;

            try
            {
                response = request.GetResponse() as HttpWebResponse;
            }
            catch (Exception e)
            {
                if (Environment.UserInteractive)
                {
                    Console.WriteLine(e);
                }
                return(null);
            }

            if (null == response)
            {
                return(null);
            }

            url.ClearError();

            using (Stream stream = response.GetResponseStream())
            {
                RequestContext context = new RequestContext(null, url, response);

                content = Content.Create(context);

                if (null != content)
                {
                    content.Read(stream);
                }
            }

            return(content);
        }
예제 #2
0
        /// <summary>
        /// 获取给定URL的内容,失败时返回NULL
        /// </summary>
        /// <param name="url">Url实例</param>
        /// <returns>Content实例或NULL</returns>
        private Content GetContent(Url url)
        {
            if (null == url)
            {
                return(null);
            }
            string         s       = url.GetUrl();
            HttpWebRequest request = WebRequest.Create(s) as HttpWebRequest;

            this.settings.PrepareRequest(request);

            //带参数的POST请求
            if (url.HttpMethod == "POST" && null != url.AppendParams && url.AppendParams.Count > 0)
            {
                request.Method      = "POST";
                request.ContentType = String.Format("application/x-www-form-urlencoded;charset={0}", this.settings.RequestEncoding.WebName);
                byte[] data = GetRequestData(url.AppendParams, this.settings.RequestEncoding);
                request.ContentLength = data.Length;
                using (Stream reqStream = request.GetRequestStream())
                {
                    reqStream.Write(data, 0, data.Length);
                }
                this.runtime.bytesSent += data.Length;
            }

            HttpWebResponse response = null;
            Content         content  = null;

            try
            {
                response = request.GetResponse() as HttpWebResponse;
            }
            catch (WebException we)
            {
                if (Environment.UserInteractive)
                {
                    Console.WriteLine(String.Format("WebException状态:{0}, 消息:{1}", we.Status, we.Message));
                }

                url.SetError(we.Message);

                if (we.Status == WebExceptionStatus.Timeout || we.Status == WebExceptionStatus.ReceiveFailure || we.Status == WebExceptionStatus.UnknownError)
                {
                    //普通或者慢速模式下,进行失败重试
                    if (((int)this.settings.SpeedMode) >= ((int)SpiderSetting.SpeedModes.Normal) && url.MaxTryTimes > 0 && url.CanTryAgain)
                    {
                        url.AddTryTime();
                        this.urlQueue.Add(url);
                    }
                }

                return(null);
            }
            catch (Exception e)
            {
                if (Environment.UserInteractive)
                {
                    this.WriteLog(e.Message);
                }
                return(null);
            }
            finally
            {
                this.runtime.urlTotal++;
            }

            if (null == response)
            {
                return(null);
            }

            url.ClearError();

            using (Stream stream = response.GetResponseStream())
            {
                RequestContext context = new RequestContext(this.settings, url, response);

                content = Content.Create(context);

                if (null != content)
                {
                    content.Read(stream);
                    this.runtime.bytesLoaded += content.ContentLength;
                }
            }

            return(content);
        }