Пример #1
0
        public override Page Download(Request request, ISpider spider)
        {
            WebDriverItem driverService = null;

            try
            {
                driverService = Pool.Get();

                lock (this)
                {
                    if (!_isLogined && Login != null)
                    {
                        _isLogined = Login.Invoke(driverService.WebDriver as RemoteWebDriver);
                        if (!_isLogined)
                        {
                            throw new SpiderExceptoin("Login failed. Please check your login codes.");
                        }
                    }
                }

                //中文乱码URL
                Uri    uri     = request.Url;
                string query   = uri.Query;
                string realUrl = uri.Scheme + "://" + uri.DnsSafeHost + ":" + uri.Port + uri.AbsolutePath + (string.IsNullOrEmpty(query) ? "" : ("?" + HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1))));

                if (UrlFormat != null)
                {
                    realUrl = UrlFormat(realUrl);
                }

                RedialManagerUtils.Execute("webdriverdownloader-download", () =>
                {
                    driverService.WebDriver.Navigate().GoToUrl(realUrl);
                });

                Thread.Sleep(_webDriverWaitTime);

                AfterNavigate?.Invoke((RemoteWebDriver)driverService.WebDriver);

                Page page = new Page(request, spider.Site.ContentType);
                page.Content = _fiddlerClient.ResponseBodyString;
                _fiddlerClient.Clear();
                page.Url       = request.Url.ToString();
                page.TargetUrl = driverService.WebDriver.Url;
                page.Title     = driverService.WebDriver.Title;

                ValidatePage(page, spider);

                // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务
                request.PutExtra(Request.CycleTriedTimes, null);

                return(page);
            }
            finally
            {
                Pool.ReturnToPool(driverService);
            }
        }
Пример #2
0
        protected override Page DowloadContent(Request request, ISpider spider)
        {
            Site site = spider.Site;

            try
            {
                lock (this)
                {
                    if (_webDriver == null)
                    {
                        _webDriver = WebDriverUtil.Open(Browser.Chrome, _option);
                    }
                    if (!_isLogined && Login != null)
                    {
                        _isLogined = Login.Invoke(_webDriver as RemoteWebDriver);
                        if (!_isLogined)
                        {
                            throw new SpiderException("Login failed. Please check your login codes.");
                        }
                    }

                    //中文乱码URL
                    Uri    uri     = request.Url;
                    string query   = uri.Query;
                    string realUrl = uri.Scheme + "://" + uri.DnsSafeHost + ":" + uri.Port + uri.AbsolutePath + (string.IsNullOrEmpty(query) ? "" : ("?" + HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1))));

                    if (UrlFormat != null)
                    {
                        realUrl = UrlFormat(realUrl);
                    }

                    NetworkCenter.Current.Execute("fid-d", () =>
                    {
                        _webDriver.Navigate().GoToUrl(realUrl);
                    });

                    Thread.Sleep(_webDriverWaitTime);

                    AfterNavigate?.Invoke((RemoteWebDriver)_webDriver);

                    Page page = new Page(request, spider.Site.ContentType, site.RemoveOutboundLinks ? site.Domains : null);
                    page.Content = _fiddlerClient.ResponseBodyString;
                    _fiddlerClient.Clear();
                    page.TargetUrl = _webDriver.Url;
                    page.Title     = _webDriver.Title;
                    // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务
                    request.PutExtra(Request.CycleTriedTimes, null);

                    return(page);
                }
            }
            catch (DownloadException)
            {
                throw;
            }
            catch (Exception e)
            {
                Page page = new Page(request, site.ContentType, null)
                {
                    Exception = e
                };
                return(page);
            }
        }