Beispiel #1
0
        private async void GetMM()
        {
            textBox1.Text = "https://www.taobao.com/markets/mm/mmku?spm=5679.126488.640763.1.KmoNZE";
            textBox2.Clear();
            textBox3.Text = @"D:\图片3";
            NumberCount   = 0;
            if (!System.IO.Directory.Exists(textBox3.Text))
            {
                System.IO.Directory.CreateDirectory(textBox3.Text);
            }


            AdvancedWebSpider Spider = new AdvancedWebSpider();

            Spider.SleepTimeWait     = 500;
            Spider.OnExceptionEvent += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox2.Text = e.Exception.Message + Environment.NewLine + e.Exception.StackTrace;
                }));
            };

            Spider.OnTotleCompleted += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox2.AppendText("完成!");
                }));
            };

            Spider.OnCompletedEvent += DealWithData;
            Operation operater = new Operation()
            {
                Action    = (x) => x.FindElement(By.XPath("//div[@class='paginations']/a[contains(@class,'next')]")).Click(),
                Timeout   = 5000,
                Condition = (x) =>
                {
                    return(x.FindElement(By.XPath("//div[@id='fn_page']")).Displayed&&
                           x.FindElement(By.XPath("//div[@class='fn_listing']")).Displayed&&
                           x.FindElement(By.XPath("//div[@class='paginations']")).Displayed&&
                           x.FindElement(By.XPath("//div[@class='paginations']/a[contains(@class,'next')]")).Displayed);
                }
            };
            //await Spider.Start(new Uri(textBox1.Text), null, new Operation());
            await Spider.StartPages(new Uri(textBox1.Text), null, operater, (x) =>
            {
                return(x.FindElement(By.XPath("//div[@class='paginations']/span[contains(@class,'current')]")).Text !=
                       x.FindElement(By.XPath("//div[@class='paginations']/span[@class='skip-wrap']/em")).Text);
            });
        }
Beispiel #2
0
        private async void AdvancedSelectHangzhou()
        {
            textBox1.Text = "http://hotels.ctrip.com/hotel/hangzhou17";//"http://hotels.ctrip.com/hotel/hangzhou17";
            AdvancedWebSpider Spider = new AdvancedWebSpider();

            Spider.OnStartEvent += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox3.AppendText(e.Uri.ToString() + " 开始" + Environment.NewLine);
                }));
            };
            Spider.OnExceptionEvent += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox3.AppendText(e.Uri.ToString() + " 异常:" + e.Exception.Message + Environment.NewLine);
                }));
            };

            var operation = new Operation
            {
                Action = (x) =>
                {
                    //通过Selenium驱动点击页面的“酒店评论”
                    //<li id="commentTab" class='current'><a href="http://hotels.ctrip.com/hotel/dianping/435383.html">酒店点评(21156)</a></li>
                    x.FindElement(By.XPath("//div[@id='page_info']/a[@id='downHerf']")).Click();
                },
                Condition = (x) =>
                {
                    return(x.FindElement(By.XPath("//div[@id='hotel_list']")).Displayed&& x.FindElement(By.XPath("//div[@id='page_info']")).Displayed&& x.FindElement(By.XPath("//div[@id='page_info']/a[@id='downHerf']")).Displayed);
                },
                Timeout = 5000
            };

            Spider.OnCompletedEvent += (s, e) =>
            {
                //提取酒店的名称
                StringBuilder sb = new StringBuilder();

                //var hotelName = e.WebDriver.FindElements(By.XPath("//div[contains(@class,'searchresult_list2')]/ul/li[@class='searchresult_info_name']/h2/a"));
                //<span class="hotel_num">1</span>浙江西湖山庄<

                string          pattern = "<span class=\"hotel_num\">[0-9]+</span>[^<]+";
                MatchCollection mc      = Regex.Matches(e.PageSource, pattern);
                foreach (Match m in mc)
                {
                    sb.Append(m.Value.Substring(m.Value.LastIndexOf('>') + 1) + Environment.NewLine);
                }
                CountTotle += mc.Count;
                Invoke(new Action(() =>
                {
                    label1.Text = "共计数据:" + CountTotle;
                    //foreach(var m in hotelName)
                    //{
                    //    sb.Append(m.Text.Replace("\n","    ") + Environment.NewLine);
                    //}
                    textBox2.AppendText(sb.ToString());
                    textBox3.AppendText(e.Uri.ToString() + " 耗时:" + e.MilliSeconds + Environment.NewLine);
                }));
            };
            await Spider.StartPages(new Uri(textBox1.Text), null, operation, (m) =>
            {
                return(m.FindElement(By.XPath("//div[@id='page_info']/div/a[@class='current']")).Text !=
                       m.FindElement(By.XPath("//div[@id='page_info']/div/a[last()]")).Text);
            });
        }