protected override Page DowloadContent(Request request, ISpider spider) { Site site = spider.Site; try { lock (_locker) { _webDriver = _webDriver ?? WebDriverExtensions.Open(_browser, _option); if (!_isLogined && Login != null) { _isLogined = Login.Handle(_webDriver as RemoteWebDriver); if (!_isLogined) { throw new DownloadException("Login failed. Please check your login codes."); } } } Uri uri = request.Url; //#if NET_CORE // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{WebUtility.UrlEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#else // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#endif // string realUrl = $"{uri.Scheme}://{uri.DnsSafeHost}{(uri.Port == 80 ? "" : ":" + uri.Port)}{uri.AbsolutePath}{query}"; //var domainUrl = $"{uri.Scheme}://{uri.DnsSafeHost}{(uri.Port == 80 ? "" : ":" + uri.Port)}"; //var options = _webDriver.Manage(); //if (options.Cookies.AllCookies.Count == 0 && spider.Site.Cookies?.PairPart.Count > 0) //{ // _webDriver.Url = domainUrl; // options.Cookies.DeleteAllCookies(); // if (spider.Site.Cookies != null) // { // foreach (var c in spider.Site.Cookies.PairPart) // { // options.Cookies.AddCookie(new Cookie(c.Key, c.Value)); // } // } //} string realUrl = request.Url.ToString(); NetworkCenter.Current.Execute("webdriver-download", () => { _webDriver.Navigate().GoToUrl(realUrl); NavigateCompeleted?.Invoke((RemoteWebDriver)_webDriver); }); Thread.Sleep(_webDriverWaitTime); #region [WDY] #region [WDY] 表单填充+提交 if (FormSubmit != null) { FormSubmit.Handle(_webDriver as RemoteWebDriver); } #endregion #region [WDY] 弹出页面 if (_returnPopupPage) { var currentWindowHandle = _webDriver.CurrentWindowHandle; var popupWindowHandle = string.Empty; foreach (var handle in _webDriver.WindowHandles) { if (handle != currentWindowHandle) { popupWindowHandle = handle; break; } } if (!string.IsNullOrEmpty(popupWindowHandle)) { _webDriver.SwitchTo().Window(popupWindowHandle); } } #endregion // [WDY] 页面内容, 禁止移动位置 string content = _webDriver.PageSource; #region [WDY] iframe /* * [WDY] Iframe 设置 * ReturnIframe = true 时, 返回 Iframe 的 html */ string iframeContent = null; if (_option.IframeOption != null && _option.IframeOption.ReturnIframe) { if (!string.IsNullOrEmpty(_option.IframeOption.IframeUrl) || !string.IsNullOrEmpty(_option.IframeOption.IframeName)) { var iframeElement = !string.IsNullOrEmpty(_option.IframeOption.IframeUrl) ? _webDriver.FindElement(By.XPath($"//iframe[contains(@src,'{_option.IframeOption.IframeUrl}')]")) : _webDriver.FindElement(By.XPath($"//iframe[contains(@name,'{_option.IframeOption.IframeName}')]")); _webDriver.SwitchTo().Frame(iframeElement); // 此处 switchTo iframe 后, 下边 _webDriver.PageSource 则会返回iframe的html iframeContent = _webDriver.PageSource; //_webDriver.SwitchTo().ParentFrame(); } } #endregion #endregion Page page = new Page(request, site.RemoveOutboundLinks ? site.Domains : null) { Content = _webDriver.PageSource, IframeContent = iframeContent, //[WDY] TargetUrl = _webDriver.Url, Title = _webDriver.Title }; // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务 //request.PutExtra(Request.CycleTriedTimes, null); return(page); } catch (DownloadException de) { Page page = new Page(request, null) { Exception = de }; if (site.CycleRetryTimes > 0) { page = Spider.AddToCycleRetry(request, site); } Logger.AllLog(spider.Identity, $"下载 {request.Url} 失败: {de.Message}.", NLog.LogLevel.Warn); return(page); } catch (Exception e) { Logger.AllLog(spider.Identity, $"下载 {request.Url} 失败: {e.Message}.", NLog.LogLevel.Warn); Page page = new Page(request, null) { Exception = e }; return(page); } }
protected override Page DowloadContent(Request request, ISpider spider) { Site site = spider.Site; try { lock (this) { _webDriver = _webDriver ?? WebDriverExtensions.Open(_browser, _option); if (!_isLogined && Login != null) { _isLogined = Login.Handle(_webDriver as RemoteWebDriver); if (!_isLogined) { throw new DownloadException("Login failed. Please check your login codes."); } } } Uri uri = request.Url; //#if NET_CORE // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{WebUtility.UrlEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#else // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#endif // string realUrl = $"{uri.Scheme}://{uri.DnsSafeHost}{(uri.Port == 80 ? "" : ":" + uri.Port)}{uri.AbsolutePath}{query}"; var domainUrl = $"{uri.Scheme}://{uri.DnsSafeHost}{(uri.Port == 80 ? "" : ":" + uri.Port)}"; var options = _webDriver.Manage(); if (options.Cookies.AllCookies.Count == 0 && spider.Site.Cookies.PairPart.Count > 0) { _webDriver.Url = domainUrl; options.Cookies.DeleteAllCookies(); foreach (var c in spider.Site.Cookies.PairPart) { options.Cookies.AddCookie(new OpenQA.Selenium.Cookie(c.Key, c.Value)); } } string realUrl = request.Url.ToString(); NetworkCenter.Current.Execute("wdd", () => { _webDriver.Navigate().GoToUrl(realUrl); NavigateCompeleted?.Invoke((RemoteWebDriver)_webDriver); }); Thread.Sleep(_webDriverWaitTime); Page page = new Page(request, site.RemoveOutboundLinks ? site.Domains : null) { Content = _webDriver.PageSource, TargetUrl = _webDriver.Url, Title = _webDriver.Title }; // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务 request.PutExtra(Request.CycleTriedTimes, null); return(page); } catch (DownloadException de) { Page page = new Page(request, null) { Exception = de }; if (site.CycleRetryTimes > 0) { page = Spider.AddToCycleRetry(request, site); } Logger.MyLog(spider.Identity, $"下载 {request.Url} 失败: {de.Message}.", NLog.LogLevel.Warn); return(page); } catch (Exception e) { Logger.MyLog(spider.Identity, $"下载 {request.Url} 失败: {e.Message}.", NLog.LogLevel.Warn); Page page = new Page(request, null) { Exception = e }; return(page); } }
protected override Page DowloadContent(Request request, ISpider spider) { Site site = spider.Site; try { lock (_locker) { _webDriver = _webDriver ?? WebDriverExtensions.Open(_browser, _option); foreach (var domain in _domains) { var cookies = _cookieContainer.GetCookies(new Uri(domain)); foreach (System.Net.Cookie cookie in cookies) { AddCookieToDownloadClient(cookie); } } if (!_isLogined && CookieInjector != null) { var webdriverLoginHandler = CookieInjector as WebDriverLoginHandler; if (webdriverLoginHandler != null) { webdriverLoginHandler.Driver = _webDriver as RemoteWebDriver; } CookieInjector.Inject(this, spider); _isLogined = true; } } //#if NET_CORE // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{WebUtility.UrlEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#else // string query = string.IsNullOrEmpty(uri.Query) ? "" : $"?{HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1))}"; //#endif // string realUrl = $"{uri.Scheme}://{uri.DnsSafeHost}{(uri.Port == 80 ? "" : ":" + uri.Port)}{uri.AbsolutePath}{query}"; var domainUrl = $"{request.Uri.Scheme}://{request.Uri.DnsSafeHost}{(request.Uri.Port == 80 ? "" : ":" + request.Uri.Port)}"; // TODO:重新实现WebDriverDownloader设置Cookie //var options = _webDriver.Manage(); //if (options.Cookies.AllCookies.Count == 0 && spider.Site.Cookies?.PairPart.Count > 0) //{ // _webDriver.Url = domainUrl; // options.Cookies.DeleteAllCookies(); // if (spider.Site.Cookies != null) // { // foreach (var c in spider.Site.Cookies.PairPart) // { // options.Cookies.AddCookie(new Cookie(c.Key, c.Value)); // } // } //} string realUrl = request.Url.ToString(); NetworkCenter.Current.Execute("webdriver-download", () => { _webDriver.Navigate().GoToUrl(realUrl); NavigateCompeleted?.Invoke((RemoteWebDriver)_webDriver); }); Thread.Sleep(_webDriverWaitTime); Page page = new Page(request) { Content = _webDriver.PageSource, TargetUrl = _webDriver.Url }; // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务 //request.PutExtra(Request.CycleTriedTimes, null); return(page); } catch (DownloadException de) { Page page = new Page(request) { Exception = de }; if (site.CycleRetryTimes > 0) { page = Spider.AddToCycleRetry(request, site); } Logger.Log(spider.Identity, $"下载 {request.Url} 失败: {de.Message}.", Level.Warn); return(page); } catch (Exception e) { Logger.Log(spider.Identity, $"下载 {request.Url} 失败: {e.Message}.", Level.Warn); Page page = new Page(request) { Exception = e }; return(page); } }