protected override Page DowloadContent(Request request, ISpider spider) { Site site = spider.Site; try { lock (this) { if (_webDriver == null) { _webDriver = WebDriverUtil.Open(Browser.Chrome, _option); } if (!_isLogined && Login != null) { _isLogined = Login.Invoke(_webDriver as RemoteWebDriver); if (!_isLogined) { throw new SpiderException("Login failed. Please check your login codes."); } } //中文乱码URL Uri uri = request.Url; string query = uri.Query; string realUrl = uri.Scheme + "://" + uri.DnsSafeHost + ":" + uri.Port + uri.AbsolutePath + (string.IsNullOrEmpty(query) ? "" : ("?" + HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1)))); if (UrlFormat != null) { realUrl = UrlFormat(realUrl); } NetworkCenter.Current.Execute("fid-d", () => { _webDriver.Navigate().GoToUrl(realUrl); }); Thread.Sleep(_webDriverWaitTime); AfterNavigate?.Invoke((RemoteWebDriver)_webDriver); Page page = new Page(request, spider.Site.ContentType, site.RemoveOutboundLinks ? site.Domain : null); page.Content = _fiddlerClient.ResponseBodyString; _fiddlerClient.Clear(); page.TargetUrl = _webDriver.Url; page.Title = _webDriver.Title; // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务 request.PutExtra(Request.CycleTriedTimes, null); return(page); } } catch (DownloadException) { throw; } catch (Exception e) { Page page = new Page(request, site.ContentType, null) { Exception = e }; return(page); } }
/// <summary> /// 下载工作的具体实现 /// </summary> /// <param name="request">请求信息</param> /// <param name="spider">爬虫</param> /// <returns>页面数据</returns> protected override Task <Page> DowloadContent(Request request, ISpider spider) { Site site = spider.Site; try { lock (_locker) { if (_webDriver == null) { _webDriver = WebDriverUtil.Open(_browser, _option); if (_domains != null) { foreach (var domain in _domains) { var cookies = CookieContainer.GetCookies(new Uri(domain)); foreach (System.Net.Cookie cookie in cookies) { AddCookieToDownloadClient(cookie); } } } if (!_isLogined && CookieInjector != null) { if (CookieInjector is WebDriverLoginHandler webdriverLoginHandler) { webdriverLoginHandler.Driver = _webDriver as RemoteWebDriver; } CookieInjector.Inject(this, spider); _isLogined = true; } } } //#if NET_CORE // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{WebUtility.UrlEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#else // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#endif // string realUrl = $"{uri.Scheme}://{uri.DnsSafeHost}{(uri.Port == 80 ? "" : ":" + uri.Port)}{uri.AbsolutePath}{query}"; // var domainUrl = // $"{request.Uri.Scheme}://{request.Uri.DnsSafeHost}{(request.Uri.Port == 80 ? "" : ":" + request.Uri.Port)}"; string realUrl = request.Url; NetworkCenter.Current.Execute("webdriver-download", () => { _webDriver.Navigate().GoToUrl(realUrl); if (WebDriverHandlers != null) { foreach (var handler in WebDriverHandlers) { handler.Handle((RemoteWebDriver)_webDriver); } } }); Thread.Sleep(_webDriverWaitTime); Page page = new Page(request) { Content = _webDriver.PageSource, TargetUrl = _webDriver.Url }; return(Task.FromResult(page)); } catch (DownloadException de) { Page page = new Page(request) { Exception = de }; if (site.CycleRetryTimes > 0) { page = site.AddToCycleRetry(request); } spider.Logger.Error($"下载 {request.Url} 失败: {de.Message}."); return(Task.FromResult(page)); } catch (Exception e) { spider.Logger.Error($"下载 {request.Url} 失败: {e.Message}."); Page page = new Page(request) { Exception = e }; return(Task.FromResult(page)); } }