Пример #1
0
        public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser)
        {
            Thread.Sleep(2000);
            string lastName      = listRow["LastName"].ToLower();
            string webBrowserUrl = this.RunPage.InvokeGetWebBrowserPageUrl(webBrowser);

            string[] lastNameParts = lastName.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            string webText = this.RunPage.InvokeGetPageHtml(webBrowser).ToLower();

            foreach (string lastNamePart in lastNameParts)
            {
                if (!webText.Contains(lastNamePart))
                {
                    throw new Exception("页面加载地址错误, webBrowserUrl=" + webBrowserUrl + ", pageUrl=" + pageUrl);
                }
            }
            ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 3000, 500, 1000, 2000);

            this.ClickAllMoreLinks(webBrowser);

            Random r = new Random(DateTime.Now.Millisecond);

            Thread.Sleep(r.Next(10) * 1000);
        }
Пример #2
0
 public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser)
 {
     ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 3000, 500, 1000, 2000);
     if (this.RunPage.InvokeCheckWebBrowserContains(webBrowser, new string[] { "系统检测到您的计算机网络中存在异常流量" }, true))
     {
         throw new Exception("Google系统检测到您的计算机网络中存在异常流量");
     }
 }
Пример #3
0
        public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser)
        {
            Thread.Sleep(2000);
            string checkValue    = "pv-top-card-v2-section__info mr5";
            string webBrowserUrl = this.RunPage.InvokeGetWebBrowserPageUrl(webBrowser);

            string webText = this.RunPage.InvokeGetPageHtml(webBrowser).ToLower();

            if (!webText.Contains(checkValue))
            {
                throw new GrabRequestException("页面加载地址错误, webBrowserUrl=" + webBrowserUrl + ", pageUrl=" + pageUrl);
            }
            ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 3000, 500, 1000, 2000);

            this.ClickAllMoreLinks(webBrowser);

            Random r = new Random(DateTime.Now.Millisecond);

            Thread.Sleep(r.Next(10) * 1000);
        }
Пример #4
0
        /// <summary>
        /// 获取当前列表页及下一页地址
        /// </summary>
        /// <param name="listPageUrl"></param>
        /// <returns></returns>
        private bool GetCurrentPageAndNextPageUrl(string seedPageUrl, string keyWords, string listPageUrl, List <string> allListPageUrls)
        {
            VisitRandomPage();

            string localFilePath = this.RunPage.GetFilePath(listPageUrl, this.RunPage.GetDetailSourceFileDir());

            if (!File.Exists(localFilePath))
            {
                string          tabName    = "ListPage";
                IeRunWebBrowser webBrowser = (IeRunWebBrowser)this.RunPage.ShowWebPage(listPageUrl, tabName, SysConfig.WebPageRequestTimeout, false, WebBrowserType.IE);
                try
                {
                    this.RunPage.CheckWebBrowserContainsForComplete(webBrowser, new string[] { keyWords }, SysConfig.WebPageRequestTimeout, true);
                }
                catch (Exception ex)
                {
                    string limitAlert    = "计算机网络中存在异常流量";
                    string errorPageHtml = this.RunPage.InvokeGetPageHtml(tabName);
                    if (errorPageHtml.Contains(limitAlert))
                    {
                        ProcessWebBrowser.ClearWebBrowserTracks();
                        ProcessWebBrowser.ClearWebBrowserCookie();
                        this.RunPage.InvokeAppendLogText(limitAlert + ". 正在清理缓存, 并等待重新启动爬取.", LogLevelType.System, true);
                        throw new Exception("Google" + limitAlert);
                    }
                    else
                    {
                        throw ex;
                    }
                }

                string listPageHtml = this.RunPage.InvokeGetPageHtml(tabName);
                ProcessWebBrowser.AutoScroll(this.RunPage, webBrowser, 2000, 1000, 1000, 2000);
                this.RunPage.SaveFile(listPageHtml, localFilePath, Encoding.UTF8);

                allListPageUrls.Add(listPageUrl);


                string scriptMethodCode = "function myGetNextPageUrl(){"
                                          + "var nextA = document.getElementById('pnnext');"
                                          + "if(nextA == null){"
                                          + "return '';"
                                          + "}"
                                          + "else{"
                                          + "return nextA.getAttribute('href');"
                                          + "}"
                                          + "}";

                this.RunPage.InvokeAddScriptMethod(webBrowser, scriptMethodCode);
                string nextPageUrl = CommonUtil.UrlDecodeSymbolAnd((string)this.RunPage.InvokeDoScriptMethod(webBrowser, "myGetNextPageUrl", null));

                if (nextPageUrl != null && nextPageUrl.Length > 0)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else
            {
                allListPageUrls.Add(listPageUrl);
                return(true);
            }
        }
Пример #5
0
 public override void WebBrowserHtml_AfterPageLoaded(string pageUrl, Dictionary <string, string> listRow, IWebBrowser webBrowser)
 {
     //滚动到页面最下面
     ProcessWebBrowser.AutoScroll(this.RunPage, (IeRunWebBrowser)webBrowser, 0, 5000, 1000, 1000, 1000);
 }