public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser) { Thread.Sleep(2000); string lastName = listRow["LastName"].ToLower(); string webBrowserUrl = this.RunPage.InvokeGetWebBrowserPageUrl(webBrowser); string[] lastNameParts = lastName.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); string webText = this.RunPage.InvokeGetPageHtml(webBrowser).ToLower(); foreach (string lastNamePart in lastNameParts) { if (!webText.Contains(lastNamePart)) { throw new Exception("页面加载地址错误, webBrowserUrl=" + webBrowserUrl + ", pageUrl=" + pageUrl); } } ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 3000, 500, 1000, 2000); this.ClickAllMoreLinks(webBrowser); Random r = new Random(DateTime.Now.Millisecond); Thread.Sleep(r.Next(10) * 1000); }
public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser) { ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 3000, 500, 1000, 2000); if (this.RunPage.InvokeCheckWebBrowserContains(webBrowser, new string[] { "系统检测到您的计算机网络中存在异常流量" }, true)) { throw new Exception("Google系统检测到您的计算机网络中存在异常流量"); } }
public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser) { Thread.Sleep(2000); string checkValue = "pv-top-card-v2-section__info mr5"; string webBrowserUrl = this.RunPage.InvokeGetWebBrowserPageUrl(webBrowser); string webText = this.RunPage.InvokeGetPageHtml(webBrowser).ToLower(); if (!webText.Contains(checkValue)) { throw new GrabRequestException("页面加载地址错误, webBrowserUrl=" + webBrowserUrl + ", pageUrl=" + pageUrl); } ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 3000, 500, 1000, 2000); this.ClickAllMoreLinks(webBrowser); Random r = new Random(DateTime.Now.Millisecond); Thread.Sleep(r.Next(10) * 1000); }
/// <summary> /// 获取当前列表页及下一页地址 /// </summary> /// <param name="listPageUrl"></param> /// <returns></returns> private bool GetCurrentPageAndNextPageUrl(string seedPageUrl, string keyWords, string listPageUrl, List <string> allListPageUrls) { VisitRandomPage(); string localFilePath = this.RunPage.GetFilePath(listPageUrl, this.RunPage.GetDetailSourceFileDir()); if (!File.Exists(localFilePath)) { string tabName = "ListPage"; IeRunWebBrowser webBrowser = (IeRunWebBrowser)this.RunPage.ShowWebPage(listPageUrl, tabName, SysConfig.WebPageRequestTimeout, false, WebBrowserType.IE); try { this.RunPage.CheckWebBrowserContainsForComplete(webBrowser, new string[] { keyWords }, SysConfig.WebPageRequestTimeout, true); } catch (Exception ex) { string limitAlert = "计算机网络中存在异常流量"; string errorPageHtml = this.RunPage.InvokeGetPageHtml(tabName); if (errorPageHtml.Contains(limitAlert)) { ProcessWebBrowser.ClearWebBrowserTracks(); ProcessWebBrowser.ClearWebBrowserCookie(); this.RunPage.InvokeAppendLogText(limitAlert + ". 正在清理缓存, 并等待重新启动爬取.", LogLevelType.System, true); throw new Exception("Google" + limitAlert); } else { throw ex; } } string listPageHtml = this.RunPage.InvokeGetPageHtml(tabName); ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 2000, 1000, 1000, 2000); this.RunPage.SaveFile(listPageHtml, localFilePath, Encoding.UTF8); allListPageUrls.Add(listPageUrl); string scriptMethodCode = "function myGetNextPageUrl(){" + "var nextA = document.getElementById('pnnext');" + "if(nextA == null){" + "return '';" + "}" + "else{" + "return nextA.getAttribute('href');" + "}" + "}"; this.RunPage.InvokeAddScriptMethod(webBrowser, scriptMethodCode); string nextPageUrl = CommonUtil.UrlDecodeSymbolAnd((string)this.RunPage.InvokeDoScriptMethod(webBrowser, "myGetNextPageUrl", null)); if (nextPageUrl != null && nextPageUrl.Length > 0) { return(true); } else { return(false); } } else { allListPageUrls.Add(listPageUrl); return(true); } }
public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser) { //滚动到页面最下面 ProcessWebBrowser.AutoScroll(this.RunPage, (IeRunWebBrowser)webBrowser, 0, 5000, 1000, 1000, 1000); }