Beispiel #1
0
        private void HotelCrawler_OnCompleted(object sender, OnCompletedEventArgs e)
        {
            DBHelp        help    = new DBHelp();
            StrongCrawler crawler = sender as StrongCrawler;
            string        commid  = DateTime.Now.ToString("yyyyMMddHHmmss");

            foreach (ReptileInfo item in crawler.Data)
            {
                item.CommId = string.IsNullOrEmpty(item.CommId) ? commid : item.CommId;
                help.InsertData(item);
            }
        }
        public ContentResult Spider(string url)
        {
            var message = string.Empty;

            try
            {
                var      uri     = new Uri(url);
                ICrawler crawler = new StrongCrawler();
                crawler.OnStart += (s, e) =>
                {
                    message += "开始抓取#";
                };
                crawler.OnCompleted += (s, e) =>
                {
                    message += string.Format("抓取完成,耗时:{0},线程Id:{1},内容:{2}", e.MilliSeconds, e.ThreadId, e.PageSource);
                    Write(message, @"G:\test\WidgetDemo\WidgetDemo.Web\App_Data\123.txt");
                };
                crawler.OnError += (s, e) =>
                {
                    message += string.Format("抓取异常,异常信息:{0}", e.Exception.Message);
                    Write(message, @"G:\test\WidgetDemo\WidgetDemo.Web\App_Data\123.txt");
                };
                var task = crawler.Start(uri);
                //var response = crawler.Start(uri);
                //while (response.IsCompleted != true) {
                //    Thread.Sleep(100);
                //}
                //var p = Task.WaitAny();
                //var result = response.Wait(20000);
                //message += string.Format("是否执行完成:{0}", result) ;
            }
            catch (Exception ex)
            {
                message = ex.Message;
            }
            return(Content(message));
        }
Beispiel #3
0
        private void StarReptile(TastMast mast, List <ElementObject> elemlist)
        {
            var           hotelUrl     = mast.Url;
            StrongCrawler hotelCrawler = null;

            if (!contains.TryGetValue(mast.ID, out hotelCrawler))
            {
                hotelCrawler = new StrongCrawler();
                contains.Add(mast.ID, hotelCrawler);
            }
            hotelCrawler.Data = new List <ReptileInfo>();
            //List<ReptileInfo> data = new List<ReptileInfo>();
            ReptileInfo info;

            hotelCrawler.OnStart += HotelCrawler_OnStart;
            //hotelCrawler.OnStart += (s, starargvs) =>
            //{
            //    //Console.WriteLine("爬虫开始抓取地址:" + starargvs.Uri.ToString());
            //    SetCtrlValues("爬虫开始抓取地址:" + starargvs.Uri.ToString());
            //    //this.listBox1.Items.Add("爬虫开始抓取地址:" + starargvs.Uri.ToString());

            //};
            hotelCrawler.OnError += (s, starargvs) =>
            {
                //Console.WriteLine("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.ToString());
                SetCtrlValues("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.Message + starargvs.Exception.StackTrace);
                //if (!mast.isAMZPRO)
                //{
                hotelCrawler.ChDriver = null;
                //}
                //hotelCrawler.OnStart -= (s2, starargvs2) =>
                // {

                // };
                //this.listBox1.Items.Add("爬虫抓取出现错误:" + starargvs.Uri.ToString() + ",异常消息:" + starargvs.Exception.ToString());
            };
            hotelCrawler.OnCompleted += HotelCrawler_OnCompleted;
            //hotelCrawler.OnCompleted += (s, starargvs) =>
            //{
            //    hotelCrawler.OnStart -= HotelCrawler_OnStart;
            //    DBHelp help = new DBHelp();
            //    foreach (ReptileInfo item in data)
            //    {
            //        help.InsertData(item);
            //    }

            //};
            hotelCrawler.Onfinally += (s, finallyobj) =>
            {
                hotelCrawler.OnStart     -= HotelCrawler_OnStart;
                hotelCrawler.OnCompleted -= HotelCrawler_OnCompleted;
                index++;
                System.Threading.Thread.Sleep(5000);
                DoNextTastStarReptile();
            };
            var operation = new Operation
            {
                Action = (x) =>
                {
                    if (mast.isAMZPRO) //从AMZPRO插件取数
                    {
                        string amzdata     = ConfigurationManager.AppSettings["amzpro"].ToString();
                        var    webelements = x.FindElements(By.XPath(amzdata));
                        string commid      = string.Empty;
                        string asin        = null;
                        for (int i = 1; i <= webelements.Count; i++)
                        {
                            commid = DateTime.Now.ToString("yyyyMMddHHmmssffff");
                            foreach (ElementObject elem in elemlist)
                            {
                                try
                                {
                                    info           = new ReptileInfo();
                                    info.ID        = mast.ID;
                                    info.FieldID   = elem.ElemID;
                                    info.FieldName = elem.FieldNm;
                                    info.ReptileDT = System.DateTime.Now;
                                    info.CommId    = commid;
                                    IWebElement webelement = null;
                                    if (!string.IsNullOrEmpty(elem.Xpath))
                                    {
                                        webelement = x.FindElement(By.XPath(string.Format(elem.Xpath, i)));
                                    }
                                    if (webelement != null)
                                    {
                                        info.FieldValue = elem.IsGetnum ? GetNums(webelement.Text) : webelement.Text;
                                        if (elem.ElemID == "Product Name")
                                        {
                                            string[] array = webelement.GetAttribute("href").Split('/');
                                            if (array != null && array.Length > 0)
                                            {
                                                asin = array[array.Length - 1];
                                            }
                                        }
                                    }
                                    hotelCrawler.Data.Add(info);
                                }
                                catch (Exception ex)
                                {
                                    SetCtrlValues(string.Format("{0}{1}", mast.Url, ex.Message));
                                }
                            }

                            #region 新增Asin字段数据
                            info            = new ReptileInfo();
                            info.ID         = mast.ID;
                            info.CommId     = commid;
                            info.FieldID    = "ASIN";
                            info.FieldName  = ConfigurationManager.AppSettings["asinname"].ToString();
                            info.FieldValue = asin;
                            info.ReptileDT  = System.DateTime.Now;
                            hotelCrawler.Data.Add(info);
                            #endregion
                            //var field = item.FindElement(By.XPath("//a[@ng-if='p.fbaFees']"));
                            //var field1 = x.FindElement(By.XPath(string.Format("/html/body/amzscout-pro/div/draggable/section/main/content/div/div[2]/div[{0}]/div[9]", i)));
                        }
                    }
                    else
                    {
                        foreach (ElementObject elem in elemlist)
                        {
                            try
                            {
                                info           = new ReptileInfo();
                                info.ID        = mast.ID;
                                info.FieldName = elem.FieldNm;
                                info.ReptileDT = System.DateTime.Now;
                                IWebElement webelement = null;

                                if (!string.IsNullOrEmpty(elem.ElemID))
                                {
                                    info.FieldID = elem.ElemID;
                                    webelement   = x.FindElement(By.Id(elem.ElemID));
                                }
                                if (!string.IsNullOrEmpty(elem.ElemNm))
                                {
                                    if (webelement != null)
                                    {
                                        webelement = webelement.FindElement(By.Name(elem.ElemNm));
                                    }
                                    else
                                    {
                                        webelement = x.FindElement(By.Name(elem.ElemNm));
                                    }
                                }
                                if (!string.IsNullOrEmpty(elem.ElemClass))
                                {
                                    try
                                    {
                                        if (webelement != null)
                                        {
                                            webelement = webelement.FindElement(By.ClassName(elem.ElemClass));
                                        }
                                        else
                                        {
                                            webelement = x.FindElement(By.ClassName(elem.ElemClass));
                                        }
                                    }
                                    catch (Exception exc2)
                                    {
                                        webelement = x.FindElement(By.XPath(elem.ElemClass));
                                        //webelement = lstelem[0];
                                        //webelement = x.FindElement(By.XPath(elem.ElemClass));
                                    }
                                }

                                if (!string.IsNullOrEmpty(elem.Xpath))
                                {
                                    if (webelement != null)
                                    {
                                        webelement = webelement.FindElement(By.XPath(elem.Xpath));
                                    }
                                    else
                                    {
                                        webelement = x.FindElement(By.XPath(elem.Xpath));
                                    }
                                }
                                if (webelement != null)
                                {
                                    info.FieldValue = elem.IsGetnum ? GetNums(webelement.Text) : webelement.Text;
                                }
                                hotelCrawler.Data.Add(info);
                            }
                            catch (Exception ex)
                            {
                                SetCtrlValues(string.Format("{0}{1}", mast.Url, ex.Message));
                            }
                        }
                    }
                },
                Condition = (x) =>
                {
                    //判断Ajax评论内容是否已经加载成功
                    //return x.FindElement(By.XPath("//*[@id='commentList']")).Displayed && x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Displayed && !x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Text.Contains("点评载入中");
                    return(true);
                },
                Timeout = 500
            };

            hotelCrawler.Start(mast, null, operation);//不操作JS先将参数设置为NULL
        }