private void HotelCrawler_OnCompleted(object sender, OnCompletedEventArgs e) { DBHelp help = new DBHelp(); StrongCrawler crawler = sender as StrongCrawler; string commid = DateTime.Now.ToString("yyyyMMddHHmmss"); foreach (ReptileInfo item in crawler.Data) { item.CommId = string.IsNullOrEmpty(item.CommId) ? commid : item.CommId; help.InsertData(item); } }
public ContentResult Spider(string url) { var message = string.Empty; try { var uri = new Uri(url); ICrawler crawler = new StrongCrawler(); crawler.OnStart += (s, e) => { message += "开始抓取#"; }; crawler.OnCompleted += (s, e) => { message += string.Format("抓取完成,耗时:{0},线程Id:{1},内容:{2}", e.MilliSeconds, e.ThreadId, e.PageSource); Write(message, @"G:\test\WidgetDemo\WidgetDemo.Web\App_Data\123.txt"); }; crawler.OnError += (s, e) => { message += string.Format("抓取异常,异常信息:{0}", e.Exception.Message); Write(message, @"G:\test\WidgetDemo\WidgetDemo.Web\App_Data\123.txt"); }; var task = crawler.Start(uri); //var response = crawler.Start(uri); //while (response.IsCompleted != true) { // Thread.Sleep(100); //} //var p = Task.WaitAny(); //var result = response.Wait(20000); //message += string.Format("是否执行完成:{0}", result) ; } catch (Exception ex) { message = ex.Message; } return(Content(message)); }
private void StarReptile(TastMast mast, List <ElementObject> elemlist) { var hotelUrl = mast.Url; StrongCrawler hotelCrawler = null; if (!contains.TryGetValue(mast.ID, out hotelCrawler)) { hotelCrawler = new StrongCrawler(); contains.Add(mast.ID, hotelCrawler); } hotelCrawler.Data = new List <ReptileInfo>(); //List<ReptileInfo> data = new List<ReptileInfo>(); ReptileInfo info; hotelCrawler.OnStart += HotelCrawler_OnStart; //hotelCrawler.OnStart += (s, starargvs) => //{ // //Console.WriteLine("爬虫开始抓取地址:" + starargvs.Uri.ToString()); // SetCtrlValues("爬虫开始抓取地址:" + starargvs.Uri.ToString()); // //this.listBox1.Items.Add("爬虫开始抓取地址:" + starargvs.Uri.ToString()); //}; hotelCrawler.OnError += (s, starargvs) => { //Console.WriteLine("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.ToString()); SetCtrlValues("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.Message + starargvs.Exception.StackTrace); //if (!mast.isAMZPRO) //{ hotelCrawler.ChDriver = null; //} //hotelCrawler.OnStart -= (s2, starargvs2) => // { // }; //this.listBox1.Items.Add("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.ToString()); }; hotelCrawler.OnCompleted += HotelCrawler_OnCompleted; //hotelCrawler.OnCompleted += (s, starargvs) => //{ // hotelCrawler.OnStart -= HotelCrawler_OnStart; // DBHelp help = new DBHelp(); // foreach (ReptileInfo item in data) // { // help.InsertData(item); // } //}; hotelCrawler.Onfinally += (s, finallyobj) => { hotelCrawler.OnStart -= HotelCrawler_OnStart; hotelCrawler.OnCompleted -= HotelCrawler_OnCompleted; index++; System.Threading.Thread.Sleep(5000); DoNextTastStarReptile(); }; var operation = new Operation { Action = (x) => { if (mast.isAMZPRO) //从AMZPRO插件取数 { string amzdata = ConfigurationManager.AppSettings["amzpro"].ToString(); var webelements = x.FindElements(By.XPath(amzdata)); string commid = string.Empty; string asin = null; for (int i = 1; i <= webelements.Count; i++) { commid = DateTime.Now.ToString("yyyyMMddHHmmssffff"); foreach (ElementObject elem in elemlist) { try { info = new ReptileInfo(); info.ID = mast.ID; info.FieldID = elem.ElemID; info.FieldName = elem.FieldNm; info.ReptileDT = System.DateTime.Now; info.CommId = commid; IWebElement webelement = null; if (!string.IsNullOrEmpty(elem.Xpath)) { webelement = x.FindElement(By.XPath(string.Format(elem.Xpath, i))); } if (webelement != null) { info.FieldValue = elem.IsGetnum ? GetNums(webelement.Text) : webelement.Text; if (elem.ElemID == "Product Name") { string[] array = webelement.GetAttribute("href").Split('/'); if (array != null && array.Length > 0) { asin = array[array.Length - 1]; } } } hotelCrawler.Data.Add(info); } catch (Exception ex) { SetCtrlValues(string.Format("{0}{1}", mast.Url, ex.Message)); } } #region 新增Asin字段数据 info = new ReptileInfo(); info.ID = mast.ID; info.CommId = commid; info.FieldID = "ASIN"; info.FieldName = ConfigurationManager.AppSettings["asinname"].ToString(); info.FieldValue = asin; info.ReptileDT = System.DateTime.Now; hotelCrawler.Data.Add(info); #endregion //var field = item.FindElement(By.XPath("//a[@ng-if='p.fbaFees']")); //var field1 = x.FindElement(By.XPath(string.Format("/html/body/amzscout-pro/div/draggable/section/main/content/div/div[2]/div[{0}]/div[9]", i))); } } else { foreach (ElementObject elem in elemlist) { try { info = new ReptileInfo(); info.ID = mast.ID; info.FieldName = elem.FieldNm; info.ReptileDT = System.DateTime.Now; IWebElement webelement = null; if (!string.IsNullOrEmpty(elem.ElemID)) { info.FieldID = elem.ElemID; webelement = x.FindElement(By.Id(elem.ElemID)); } if (!string.IsNullOrEmpty(elem.ElemNm)) { if (webelement != null) { webelement = webelement.FindElement(By.Name(elem.ElemNm)); } else { webelement = x.FindElement(By.Name(elem.ElemNm)); } } if (!string.IsNullOrEmpty(elem.ElemClass)) { try { if (webelement != null) { webelement = webelement.FindElement(By.ClassName(elem.ElemClass)); } else { webelement = x.FindElement(By.ClassName(elem.ElemClass)); } } catch (Exception exc2) { webelement = x.FindElement(By.XPath(elem.ElemClass)); //webelement = lstelem[0]; //webelement = x.FindElement(By.XPath(elem.ElemClass)); } } if (!string.IsNullOrEmpty(elem.Xpath)) { if (webelement != null) { webelement = webelement.FindElement(By.XPath(elem.Xpath)); } else { webelement = x.FindElement(By.XPath(elem.Xpath)); } } if (webelement != null) { info.FieldValue = elem.IsGetnum ? GetNums(webelement.Text) : webelement.Text; } hotelCrawler.Data.Add(info); } catch (Exception ex) { SetCtrlValues(string.Format("{0}{1}", mast.Url, ex.Message)); } } } }, Condition = (x) => { //判断Ajax评论内容是否已经加载成功 //return x.FindElement(By.XPath("//*[@id='commentList']")).Displayed && x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Displayed && !x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Text.Contains("点评载入中"); return(true); }, Timeout = 500 }; hotelCrawler.Start(mast, null, operation);//不操作JS先将参数设置为NULL }