Beispiel #1
0
        static void Main(string[] args)
        {
            var hotelUrl     = "http://hotels.ctrip.com/hotel/434938.html";
            var hotelCrawler = new StrongCrawler();

            hotelCrawler.OnStart += (s, e) =>
            {
                Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString());
            };
            hotelCrawler.OnError += (s, e) =>
            {
                Console.WriteLine("爬虫抓取出现错误:" + e.Uri.ToString() + ",异常消息:" + e.Exception.ToString());
            };
            hotelCrawler.OnCompleted += (s, e) =>
            {
                HotelCrawler(e);
            };
            var operation = new Operation
            {
                Action = (x) => {
                    //通过Selenium驱动点击页面的“酒店评论”
                    x.FindElement(By.XPath("//*[@id='commentTab']")).Click();
                },
                Condition = (x) => {
                    //判断Ajax评论内容是否已经加载成功
                    return(x.FindElement(By.XPath("//*[@id='commentList']")).Displayed&& x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Displayed&& !x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Text.Contains("点评载入中"));
                },
                Timeout = 5000
            };

            hotelCrawler.Start(new Uri(hotelUrl), null, operation);//不操作JS先将参数设置为NULL

            Console.ReadKey();
        }
Beispiel #2
0
        public static void GetHotel()
        {
            //进入查询首页
            var hotelUrl     = "http://mobile.12306.cn/weixin/wxcore/init";
            var hotelCrawler = new StrongCrawler();

            hotelCrawler.OnStart += (s, e) =>
            {
                Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString());
            };
            hotelCrawler.OnError += (s, e) =>
            {
                Console.WriteLine("爬虫抓取出现错误:" + e.Uri.ToString() + ",异常消息:" + e.Exception.ToString());
            };
            hotelCrawler.OnCompleted += (s, e) =>
            {
                HotelCrawler(e);
            };
            var operation = new Operation
            {
                Action = (x) =>
                {
                    //通过Selenium驱动点击页面的“酒店评论”
                    x.FindElement(By.XPath("//*[@id='J_depart_name']")).Click();
                },
                Condition = (x) =>
                {
                    //判断Ajax评论内容是否已经加载成功
                    return(x.FindElement(By.XPath("//*[@id='his_citybox']")).Displayed);
                },
                Timeout = 5000
            };

            hotelCrawler.Start(new Uri(hotelUrl), null, operation);//不操作JS先将参数设置为NULL

            Console.ReadKey();
        }
Beispiel #3
0
        static void Main(string[] args)
        {
            var hotelUrl     = "https://www.amazon.com/dp/B00AM5XB5O";
            var hotelCrawler = new StrongCrawler();

            hotelCrawler.OnStart += (s, e) =>
            {
                Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString());
            };
            hotelCrawler.OnError += (s, e) =>
            {
                Console.WriteLine("爬虫抓取出现错误:" + e.Uri.ToString() + ",异常消息:" + e.Exception.ToString());
            };
            hotelCrawler.OnCompleted += (s, e) =>
            {
                //HotelCrawler(e);
            };
            var operation = new Operation
            {
                Action = (x) => {
                    //通过Selenium驱动点击页面的“酒店评论”
                    string ss = x.FindElement(By.Id("priceblock_ourprice")).Text;
                    Console.WriteLine(ss);
                },
                Condition = (x) => {
                    //判断Ajax评论内容是否已经加载成功
                    //return x.FindElement(By.XPath("//*[@id='commentList']")).Displayed && x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Displayed && !x.FindElement(By.XPath("//*[@id='hotel_info_comment']/div[@id='commentList']")).Text.Contains("点评载入中");
                    return(true);
                },
                Timeout = 5000
            };

            //hotelCrawler.Start(new Uri(hotelUrl), null, operation);//不操作JS先将参数设置为NULL

            Console.ReadKey();
        }