//执行下一条任务 private void DoNextTastStarReptile() { TastMast mast; List <ElementObject> elemlist; if (index < this.dataGridView1.Rows.Count) { DataGridViewRow row = this.dataGridView1.Rows[index]; //foreach (DataGridViewRow row in this.dataGridView1.Rows) //{ mast = new TastMast(); mast.ID = row.Cells["ID"].Value.ToString(); mast.Url = row.Cells["url"].Value.ToString(); mast.TastNm = row.Cells["TastNm"].Value.ToString(); mast.LoginUrl = row.Cells["LoginUrl"].Value.ToString(); mast.UserID = row.Cells["UserID"].Value.ToString(); mast.UserInputId = row.Cells["UserInputId"].Value.ToString(); mast.Pwd = row.Cells["Pwd"].Value.ToString(); mast.PwdInputId = row.Cells["PwdInputId"].Value.ToString(); mast.LoginBtnId = row.Cells["LoginBtnId"].Value.ToString(); mast.isAMZPRO = ConvertoBool(row.Cells["isAMZPRO"].Value.ToString()); mast.SearchKey = row.Cells["SearchKey"].Value.ToString(); mast.SearchInputID = row.Cells["SearchInputID"].Value.ToString(); mast.SearchBtnID = row.Cells["SearchBtnID"].Value.ToString(); mast.AMZxpath = row.Cells["AMZxpath"].Value.ToString(); elemlist = JsonConvert.DeserializeObject <List <ElementObject> >(row.Cells["infoStr"].Value.ToString()); StarReptile(mast, elemlist); //} } }
private void timer1_Tick(object sender, EventArgs e) { TastMast mast; List <ElementObject> elemlist; if (Constvariable.Timing != -1)//定时抓取数据 { if (System.DateTime.Now.Hour != Constvariable.Timing || DateTime.Now.Day == Constvariable.day) { return; } } index = 0; #region 重新载入数据 DBHelp dbhelp = new DBHelp(); dataSet1.TastMast.FillData(dbhelp.GetDataTable("select *From TastMast a,TastDetail b where a.ID=b.ID and a.IsUse=1")); contains.Clear(); #endregion //if (index < this.dataGridView1.Rows.Count) //{ DataGridViewRow row = this.dataGridView1.Rows[index]; //foreach (DataGridViewRow row in this.dataGridView1.Rows) //{ mast = new TastMast(); mast.ID = row.Cells["ID"].Value.ToString(); mast.Url = row.Cells["url"].Value.ToString(); mast.TastNm = row.Cells["TastNm"].Value.ToString(); mast.LoginUrl = row.Cells["LoginUrl"].Value.ToString(); mast.UserID = row.Cells["UserID"].Value.ToString(); mast.UserInputId = row.Cells["UserInputId"].Value.ToString(); mast.Pwd = row.Cells["Pwd"].Value.ToString(); mast.PwdInputId = row.Cells["PwdInputId"].Value.ToString(); mast.LoginBtnId = row.Cells["LoginBtnId"].Value.ToString(); mast.isAMZPRO = ConvertoBool(row.Cells["isAMZPRO"].Value.ToString()); mast.SearchKey = row.Cells["SearchKey"].Value.ToString(); mast.SearchInputID = row.Cells["SearchInputID"].Value.ToString(); mast.SearchBtnID = row.Cells["SearchBtnID"].Value.ToString(); mast.AMZxpath = row.Cells["AMZxpath"].Value.ToString(); elemlist = JsonConvert.DeserializeObject <List <ElementObject> >(row.Cells["infoStr"].Value.ToString()); StarReptile(mast, elemlist); Constvariable.day = DateTime.Now.Day; //} //} //else //{ // index = 0; //} }
/// <summary> /// 高级爬虫 /// </summary> /// <param name="uri">抓取地址URL</param> /// <param name="script">要执行的Javascript脚本代码</param> /// <param name="operation">要执行的页面操作</param> /// <returns></returns> public async Task Start(TastMast mast, Script script, Operation operation) { Uri uri = new Uri(mast.Url); await Task.Run(() => { if (OnStart != null) { this.OnStart(this, new OnStartEventArgs(uri)); } //var driver = new PhantomJSDriver(_service, _options);//实例化PhantomJS的WebDriver ChromeOptions options = new ChromeOptions(); if (mast.isAMZPRO) { this._Extension = new string[1] { string.Format("{0}/njopapoodmifmcogpingplfphojnfeea.crx", Environment.CurrentDirectory) }; ChDriver.Navigate().GoToUrl("https://www.amazon.com"); try { var watch = DateTime.Now; ChDriver.Navigate().GoToUrl(mast.Url); #region Search Product if (!string.IsNullOrEmpty(mast.SearchInputID)) { IWebElement search = ChDriver.FindElementById(mast.SearchInputID); if (search != null) { search.SendKeys(mast.SearchKey); } } if (!string.IsNullOrEmpty(mast.SearchBtnID)) { var searchbtn = ChDriver.FindElementByXPath(mast.SearchBtnID); if (searchbtn != null) { searchbtn.Click(); } System.Threading.Thread.Sleep(8000); } else { System.Threading.Thread.Sleep(30000); } #endregion #region Open AmazcoutPro //var AMZ = ChDriver.FindElementByXPath("//div[@class='content-wrapper']"); IWebElement AMZ = null; if (!string.IsNullOrEmpty(mast.AMZxpath)) { AMZ = ChDriver.FindElementByXPath(mast.AMZxpath); //AMZ = ChDriver.FindElementByClassName(mast.AMZxpath); } else { AMZ = ChDriver.FindElementByXPath(ConfigurationManager.AppSettings["AmazcoutPro"].ToString()); } if (AMZ != null) { AMZ.Click(); } System.Threading.Thread.Sleep(30000); #endregion #region 登录amzpro插件 try { var google = ChDriver.FindElementByXPath(ConfigurationManager.AppSettings["googlelogin"].ToString()); if (google != null) { string oldwin = ChDriver.CurrentWindowHandle; google.Click(); System.Threading.Thread.Sleep(3000); var pag = ChDriver.SwitchTo().Window(ChDriver.WindowHandles[ChDriver.WindowHandles.Count - 1]); var accoutinput = pag.FindElement(By.Id(ConfigurationManager.AppSettings["accoutinput"].ToString())); if (accoutinput != null) { accoutinput.SendKeys(ConfigurationManager.AppSettings["accout"].ToString()); System.Threading.Thread.Sleep(1000); var next = pag.FindElement(By.XPath(ConfigurationManager.AppSettings["next"].ToString())); if (next != null) { next.Click(); System.Threading.Thread.Sleep(3000); var pwd = pag.FindElement(By.XPath(ConfigurationManager.AppSettings["pwdelem"].ToString())); if (pwd != null) { pwd.SendKeys(ConfigurationManager.AppSettings["pwd"].ToString()); System.Threading.Thread.Sleep(1000); var next2 = pag.FindElement(By.XPath(ConfigurationManager.AppSettings["next2"].ToString())); if (next2 != null) { next2.Click(); ChDriver.SwitchTo().Window(oldwin); System.Threading.Thread.Sleep(3000); try { var telep = pag.FindElement(By.XPath(ConfigurationManager.AppSettings["telepelem"].ToString())); if (telep != null) { telep.SendKeys(ConfigurationManager.AppSettings["telep"].ToString()); System.Threading.Thread.Sleep(1000); var next3 = pag.FindElement(By.XPath(ConfigurationManager.AppSettings["next3"].ToString())); if (next3 != null) { next3.Click(); System.Threading.Thread.Sleep(3000); } } } catch (Exception excelp) { } } } } } ChDriver.Navigate().GoToUrl(mast.Url); #region Search Product if (!string.IsNullOrEmpty(mast.SearchInputID)) { IWebElement search = ChDriver.FindElementById(mast.SearchInputID); if (search != null) { search.SendKeys(mast.SearchKey); } } if (!string.IsNullOrEmpty(mast.SearchBtnID)) { var searchbtn = ChDriver.FindElementByXPath(mast.SearchBtnID); if (searchbtn != null) { searchbtn.Click(); } System.Threading.Thread.Sleep(8000); } else { System.Threading.Thread.Sleep(30000); } #endregion #region Open AmazcoutPro IWebElement AMZ2 = null; if (!string.IsNullOrEmpty(mast.AMZxpath)) { AMZ2 = ChDriver.FindElementByXPath(mast.AMZxpath); } else { AMZ2 = ChDriver.FindElementByXPath(ConfigurationManager.AppSettings["AmazcoutPro"].ToString()); } if (AMZ2 != null) { AMZ2.Click(); } System.Threading.Thread.Sleep(30000); #endregion } //var accoutinput = ChDriver.FindElementById("identifierId"); } catch (Exception ex) { } #endregion #region old code search product //ChDriver.Navigate ().GoToUrl("https://www.amazon.com/");//请求URL地址 //var searchtext = ChDriver.FindElementById("twotabsearchtextbox"); //if (searchtext != null) //{ // searchtext.Clear(); // searchtext.SendKeys("四维电子显微镜"); //} //var searchbtn = ChDriver.FindElementByXPath("//input[@tabindex='20']"); //if (searchbtn != null) //{ // searchbtn.Click(); //} #endregion //var amt = ChDriver.FindElementByXPath("//a[@ng-if='p.fbaFees'][@class='ng-binding ng-scope']").Text; //var ssss = ChDriver.FindElementByClassName("maintable"); if (script != null) { ChDriver.ExecuteScript(script.Code, script.Args); //执行Javascript代码 } if (operation.Action != null) { operation.Action.Invoke(ChDriver); } var driverWait = new WebDriverWait(ChDriver, TimeSpan.FromMilliseconds(operation.Timeout));//设置超时时间为x毫秒 if (operation.Condition != null) { driverWait.Until(operation.Condition); } var threadId = System.Threading.Thread.CurrentThread.ManagedThreadId; //获取当前任务线程ID var milliseconds = DateTime.Now.Subtract(watch).Milliseconds; //获取请求执行时间; var pageSource = ChDriver.PageSource; //获取网页Dom结构 this.OnCompleted?.Invoke(this, new OnCompletedEventArgs(uri, threadId, milliseconds, pageSource, ChDriver)); } catch (Exception exc) { this.OnError?.Invoke(this, new OnErrorEventArgs(uri, exc)); } finally { this.Onfinally?.Invoke(this, new OnfinallEventArgs()); } } else { options.AddExtensions(string.Format("{0}/zhushou_v2.3.0.crx", Environment.CurrentDirectory)); var driver = ChDriver; //var driver = new ChromeDriver(); try { #region 如果需要登录,需先进行登录 if (!string.IsNullOrEmpty(mast.LoginUrl)) { driver.Navigate().GoToUrl(mast.LoginUrl); IWebElement userElem = null; try { userElem = driver.FindElementById(mast.UserInputId); } catch (Exception EX2) { userElem = driver.FindElementByName(mast.UserInputId); } if (userElem != null) { userElem.SendKeys(mast.UserID); } IWebElement pwd = null; try { pwd = driver.FindElementById(mast.PwdInputId); } catch (Exception ex3) { pwd = driver.FindElementByName(mast.PwdInputId); } if (pwd != null) { pwd.SendKeys(mast.Pwd); } IWebElement loginbtn = null; try { loginbtn = driver.FindElementById(mast.LoginBtnId); } catch (Exception ex4) { loginbtn = driver.FindElementByName(mast.LoginBtnId); } if (loginbtn != null) { loginbtn.Click(); } //driver.FindElementByXPath } #endregion var watch = DateTime.Now; driver.Navigate().GoToUrl(uri.ToString());//请求URL地址 if (script != null) { driver.ExecuteScript(script.Code, script.Args); //执行Javascript代码 } if (operation.Action != null) { operation.Action.Invoke(driver); } var driverWait = new WebDriverWait(driver, TimeSpan.FromMilliseconds(operation.Timeout));//设置超时时间为x毫秒 if (operation.Condition != null) { driverWait.Until(operation.Condition); } var threadId = System.Threading.Thread.CurrentThread.ManagedThreadId; //获取当前任务线程ID var milliseconds = DateTime.Now.Subtract(watch).Milliseconds; //获取请求执行时间; var pageSource = driver.PageSource; //获取网页Dom结构 this.OnCompleted?.Invoke(this, new OnCompletedEventArgs(uri, threadId, milliseconds, pageSource, driver)); } catch (Exception ex) { this.OnError?.Invoke(this, new OnErrorEventArgs(uri, ex)); } finally { this.Onfinally?.Invoke(this, new OnfinallEventArgs()); //driver.Close(); //driver.Quit(); } } }); }
private void StarReptile(TastMast mast, List <ElementObject> elemlist) { var hotelUrl = mast.Url; StrongCrawler hotelCrawler = null; if (!contains.TryGetValue(mast.ID, out hotelCrawler)) { hotelCrawler = new StrongCrawler(); contains.Add(mast.ID, hotelCrawler); } hotelCrawler.Data = new List <ReptileInfo>(); //List<ReptileInfo> data = new List<ReptileInfo>(); ReptileInfo info; hotelCrawler.OnStart += HotelCrawler_OnStart; //hotelCrawler.OnStart += (s, starargvs) => //{ // //Console.WriteLine("爬虫开始抓取地址:" + starargvs.Uri.ToString()); // SetCtrlValues("爬虫开始抓取地址:" + starargvs.Uri.ToString()); // //this.listBox1.Items.Add("爬虫开始抓取地址:" + starargvs.Uri.ToString()); //}; hotelCrawler.OnError += (s, starargvs) => { //Console.WriteLine("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.ToString()); SetCtrlValues("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.Message + starargvs.Exception.StackTrace); //if (!mast.isAMZPRO) //{ hotelCrawler.ChDriver = null; //} //hotelCrawler.OnStart -= (s2, starargvs2) => // { // }; //this.listBox1.Items.Add("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.ToString()); }; hotelCrawler.OnCompleted += HotelCrawler_OnCompleted; //hotelCrawler.OnCompleted += (s, starargvs) => //{ // hotelCrawler.OnStart -= HotelCrawler_OnStart; // DBHelp help = new DBHelp(); // foreach (ReptileInfo item in data) // { // help.InsertData(item); // } //}; hotelCrawler.Onfinally += (s, finallyobj) => { hotelCrawler.OnStart -= HotelCrawler_OnStart; hotelCrawler.OnCompleted -= HotelCrawler_OnCompleted; index++; System.Threading.Thread.Sleep(5000); DoNextTastStarReptile(); }; var operation = new Operation { Action = (x) => { if (mast.isAMZPRO) //从AMZPRO插件取数 { string amzdata = ConfigurationManager.AppSettings["amzpro"].ToString(); var webelements = x.FindElements(By.XPath(amzdata)); string commid = string.Empty; string asin = null; for (int i = 1; i <= webelements.Count; i++) { commid = DateTime.Now.ToString("yyyyMMddHHmmssffff"); foreach (ElementObject elem in elemlist) { try { info = new ReptileInfo(); info.ID = mast.ID; info.FieldID = elem.ElemID; info.FieldName = elem.FieldNm; info.ReptileDT = System.DateTime.Now; info.CommId = commid; IWebElement webelement = null; if (!string.IsNullOrEmpty(elem.Xpath)) { webelement = x.FindElement(By.XPath(string.Format(elem.Xpath, i))); } if (webelement != null) { info.FieldValue = elem.IsGetnum ? GetNums(webelement.Text) : webelement.Text; if (elem.ElemID == "Product Name") { string[] array = webelement.GetAttribute("href").Split('/'); if (array != null && array.Length > 0) { asin = array[array.Length - 1]; } } } hotelCrawler.Data.Add(info); } catch (Exception ex) { SetCtrlValues(string.Format("{0}{1}", mast.Url, ex.Message)); } } #region 新增Asin字段数据 info = new ReptileInfo(); info.ID = mast.ID; info.CommId = commid; info.FieldID = "ASIN"; info.FieldName = ConfigurationManager.AppSettings["asinname"].ToString(); info.FieldValue = asin; info.ReptileDT = System.DateTime.Now; hotelCrawler.Data.Add(info); #endregion //var field = item.FindElement(By.XPath("//a[@ng-if='p.fbaFees']")); //var field1 = x.FindElement(By.XPath(string.Format("/html/body/amzscout-pro/div/draggable/section/main/content/div/div[2]/div[{0}]/div[9]", i))); } } else { foreach (ElementObject elem in elemlist) { try { info = new ReptileInfo(); info.ID = mast.ID; info.FieldName = elem.FieldNm; info.ReptileDT = System.DateTime.Now; IWebElement webelement = null; if (!string.IsNullOrEmpty(elem.ElemID)) { info.FieldID = elem.ElemID; webelement = x.FindElement(By.Id(elem.ElemID)); } if (!string.IsNullOrEmpty(elem.ElemNm)) { if (webelement != null) { webelement = webelement.FindElement(By.Name(elem.ElemNm)); } else { webelement = x.FindElement(By.Name(elem.ElemNm)); } } if (!string.IsNullOrEmpty(elem.ElemClass)) { try { if (webelement != null) { webelement = webelement.FindElement(By.ClassName(elem.ElemClass)); } else { webelement = x.FindElement(By.ClassName(elem.ElemClass)); } } catch (Exception exc2) { webelement = x.FindElement(By.XPath(elem.ElemClass)); //webelement = lstelem[0]; //webelement = x.FindElement(By.XPath(elem.ElemClass)); } } if (!string.IsNullOrEmpty(elem.Xpath)) { if (webelement != null) { webelement = webelement.FindElement(By.XPath(elem.Xpath)); } else { webelement = x.FindElement(By.XPath(elem.Xpath)); } } if (webelement != null) { info.FieldValue = elem.IsGetnum ? GetNums(webelement.Text) : webelement.Text; } hotelCrawler.Data.Add(info); } catch (Exception ex) { SetCtrlValues(string.Format("{0}{1}", mast.Url, ex.Message)); } } } }, Condition = (x) => { //判断Ajax评论内容是否已经加载成功 //return x.FindElement(By.XPath("//*[@id='commentList']")).Displayed && x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Displayed && !x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Text.Contains("点评载入中"); return(true); }, Timeout = 500 }; hotelCrawler.Start(mast, null, operation);//不操作JS先将参数设置为NULL }
private void button1_Click(object sender, EventArgs e) { TastMast mast = new TastMast(); mast.ID = string.IsNullOrEmpty(this.txtID.Text) ? System.DateTime.Now.ToString("yyyymmddHHmmss") : this.txtID.Text; mast.Url = this.txturl.Text; mast.TastNm = this.txtTastNm.Text; mast.LoginUrl = this.txtloginurl.Text.Trim(); mast.UserID = this.txtuserId.Text.Trim(); mast.UserInputId = this.txtuseridinput.Text.Trim(); mast.Pwd = this.txtpwd.Text.Trim(); mast.PwdInputId = this.txtpwdinput.Text.Trim(); mast.LoginBtnId = this.txtloginbtn.Text.Trim(); mast.isAMZPRO = this.checkBox1.Checked; mast.SearchKey = this.txbsearchkey.Text.Trim(); mast.SearchInputID = this.txtInputID.Text.Trim(); mast.SearchBtnID = this.txtSearchBtnID.Text.Trim(); mast.AMZxpath = this.txtamzxpath.Text.Trim(); TastDetail detail = new TastDetail(); detail.RowNo = 1; detail.ID = mast.ID; List <ElementObject> elemlist = new List <ElementObject>(); for (int i = 0; i < this.dataGridView1.Rows.Count - 1; i++) { DataGridViewRow row = this.dataGridView1.Rows[i]; ElementObject elem = new ElementObject(); elem.ElemID = row.Cells["ElemID"].Value == null ? string.Empty : row.Cells["ElemID"].Value.ToString(); elem.ElemNm = row.Cells["ElemNm"].Value == null ? string.Empty : row.Cells["ElemNm"].Value.ToString(); elem.ElemClass = row.Cells["ElemClass"].Value == null ? string.Empty : row.Cells["ElemClass"].Value.ToString(); elem.ElemTagNm = row.Cells["ElemTagNm"].Value == null ? string.Empty : row.Cells["ElemTagNm"].Value.ToString(); elem.Xpath = row.Cells["XPath"].Value == null ?string.Empty : row.Cells["XPath"].Value.ToString(); elem.FieldNm = row.Cells["FieldNm"].Value == null ? string.Empty : row.Cells["FieldNm"].Value.ToString(); elem.IsGetnum = (bool)(row.Cells["IsGetnum"].Value == null ?false : row.Cells["IsGetnum"].Value); elem.PostEmail = (bool)(row.Cells["PostEmail"].Value == null ?false : row.Cells["PostEmail"].Value); elemlist.Add(elem); } detail.InfoStr = JsonConvert.SerializeObject(elemlist); if (_status == WindowStatus.Add) { DBHelp dbhelp = new DBHelp(); dbhelp.BeginTrans(); dbhelp.InsertData(mast); dbhelp.InsertData(detail); dbhelp.Commit(); } if (_status == WindowStatus.Edit) { DBHelp dbhelp = new DBHelp(); dbhelp.BeginTrans(); dbhelp.Update(mast, new Dictionary <string, object> { { "ID", mast.ID } }); dbhelp.Update(detail, new Dictionary <string, object> { { "ID", mast.ID } }); dbhelp.Commit(); } this.Close(); }