private async void GetMM() { textBox1.Text = "https://www.taobao.com/markets/mm/mmku?spm=5679.126488.640763.1.KmoNZE"; textBox2.Clear(); textBox3.Text = @"D:\图片3"; NumberCount = 0; if (!System.IO.Directory.Exists(textBox3.Text)) { System.IO.Directory.CreateDirectory(textBox3.Text); } AdvancedWebSpider Spider = new AdvancedWebSpider(); Spider.SleepTimeWait = 500; Spider.OnExceptionEvent += (s, e) => { Invoke(new Action(() => { textBox2.Text = e.Exception.Message + Environment.NewLine + e.Exception.StackTrace; })); }; Spider.OnTotleCompleted += (s, e) => { Invoke(new Action(() => { textBox2.AppendText("完成!"); })); }; Spider.OnCompletedEvent += DealWithData; Operation operater = new Operation() { Action = (x) => x.FindElement(By.XPath("//div[@class='paginations']/a[contains(@class,'next')]")).Click(), Timeout = 5000, Condition = (x) => { return(x.FindElement(By.XPath("//div[@id='fn_page']")).Displayed&& x.FindElement(By.XPath("//div[@class='fn_listing']")).Displayed&& x.FindElement(By.XPath("//div[@class='paginations']")).Displayed&& x.FindElement(By.XPath("//div[@class='paginations']/a[contains(@class,'next')]")).Displayed); } }; //await Spider.Start(new Uri(textBox1.Text), null, new Operation()); await Spider.StartPages(new Uri(textBox1.Text), null, operater, (x) => { return(x.FindElement(By.XPath("//div[@class='paginations']/span[contains(@class,'current')]")).Text != x.FindElement(By.XPath("//div[@class='paginations']/span[@class='skip-wrap']/em")).Text); }); }
private async void AdvancedSelectHangzhou() { textBox1.Text = "http://hotels.ctrip.com/hotel/hangzhou17";//"http://hotels.ctrip.com/hotel/hangzhou17"; AdvancedWebSpider Spider = new AdvancedWebSpider(); Spider.OnStartEvent += (s, e) => { Invoke(new Action(() => { textBox3.AppendText(e.Uri.ToString() + " 开始" + Environment.NewLine); })); }; Spider.OnExceptionEvent += (s, e) => { Invoke(new Action(() => { textBox3.AppendText(e.Uri.ToString() + " 异常:" + e.Exception.Message + Environment.NewLine); })); }; var operation = new Operation { Action = (x) => { //通过Selenium驱动点击页面的“酒店评论” //<li id="commentTab" class='current'><a href="http://hotels.ctrip.com/hotel/dianping/435383.html">酒店点评(21156)</a></li> x.FindElement(By.XPath("//div[@id='page_info']/a[@id='downHerf']")).Click(); }, Condition = (x) => { return(x.FindElement(By.XPath("//div[@id='hotel_list']")).Displayed&& x.FindElement(By.XPath("//div[@id='page_info']")).Displayed&& x.FindElement(By.XPath("//div[@id='page_info']/a[@id='downHerf']")).Displayed); }, Timeout = 5000 }; Spider.OnCompletedEvent += (s, e) => { //提取酒店的名称 StringBuilder sb = new StringBuilder(); //var hotelName = e.WebDriver.FindElements(By.XPath("//div[contains(@class,'searchresult_list2')]/ul/li[@class='searchresult_info_name']/h2/a")); //<span class="hotel_num">1</span>浙江西湖山庄< string pattern = "<span class=\"hotel_num\">[0-9]+</span>[^<]+"; MatchCollection mc = Regex.Matches(e.PageSource, pattern); foreach (Match m in mc) { sb.Append(m.Value.Substring(m.Value.LastIndexOf('>') + 1) + Environment.NewLine); } CountTotle += mc.Count; Invoke(new Action(() => { label1.Text = "共计数据:" + CountTotle; //foreach(var m in hotelName) //{ // sb.Append(m.Text.Replace("\n"," ") + Environment.NewLine); //} textBox2.AppendText(sb.ToString()); textBox3.AppendText(e.Uri.ToString() + " 耗时:" + e.MilliSeconds + Environment.NewLine); })); }; await Spider.StartPages(new Uri(textBox1.Text), null, operation, (m) => { return(m.FindElement(By.XPath("//div[@id='page_info']/div/a[@class='current']")).Text != m.FindElement(By.XPath("//div[@id='page_info']/div/a[last()]")).Text); }); }