private async void SelectHangzhou() { /************************************************************** * * 目前只能爬取到第一页的数据信息 * * Only crawl the first page data * **************************************************************/ textBox1.Text = "http://hotels.ctrip.com/hotel/hangzhou17"; WebSpider Spider = new WebSpider(); Spider.OnStartEvent += (s, e) => { Invoke(new Action(() => { textBox3.AppendText(e.Uri.ToString() + " 开始" + Environment.NewLine); })); }; Spider.OnExceptionEvent += (s, e) => { Invoke(new Action(() => { textBox3.AppendText(e.Uri.ToString() + " 异常:" + e.Exception.Message + Environment.NewLine); })); }; Spider.OnCompletedEvent += (s, e) => { //<span class="hotel_num">1</span>杭州马可波罗假日酒店</a> string pattern = "<span class=\"hotel_num\">[0-9]+</span>[^<]+"; string addr = "<p class=\"searchresult_htladdress\">[\\S]+"; MatchCollection mc = Regex.Matches(e.PageSource, pattern, RegexOptions.IgnoreCase); MatchCollection mcaddr = Regex.Matches(e.PageSource, addr, RegexOptions.IgnoreCase); StringBuilder sb = new StringBuilder(); int index = 0; foreach (Match m in mc) { sb.Append(m.Value.Substring(m.Value.LastIndexOf('>') + 1) + " 地址[" + mcaddr[index].Value.Substring(mcaddr[index].Value.LastIndexOf('>') + 1) + "]" + Environment.NewLine); //sb.Append(Regex.Match(m.Value, "/hotel/.+[^\\\"]").Value.Substring(1) + Environment.NewLine); //sb.Append(m.Value + Environment.NewLine); index++; } Invoke(new Action(() => { label1.Text = "共计数据:" + mc.Count; textBox2.Text = sb.ToString(); textBox3.AppendText(e.Uri.ToString() + " 耗时:" + e.MilliSeconds + Environment.NewLine); })); }; await Spider.Start(new Uri(textBox1.Text)); }
private async void SelectCities() { textBox1.Text = "http://hotels.ctrip.com/citylist"; WebSpider Spider = new WebSpider(); Spider.OnStartEvent += (s, e) => { Invoke(new Action(() => { textBox3.AppendText(e.Uri.ToString() + " 开始" + Environment.NewLine); })); }; Spider.OnExceptionEvent += (s, e) => { Invoke(new Action(() => { textBox3.AppendText(e.Uri.ToString() + " 异常:" + e.Exception.Message + Environment.NewLine); })); }; Spider.OnCompletedEvent += (s, e) => { string pattern = "<a href=\"/hotel/[a-z0-9]+\" title=\"[^\"]+\">[^<]+</a>"; MatchCollection mc = Regex.Matches(e.PageSource, pattern, RegexOptions.IgnoreCase); StringBuilder sb = new StringBuilder(); foreach (Match m in mc) { sb.Append(Regex.Match(m.Value, ">[^<]+").Value.Substring(1) + Environment.NewLine); //sb.Append(Regex.Match(m.Value, "/hotel/.+[^\\\"]").Value.Substring(1) + Environment.NewLine); //sb.Append(m.Value + Environment.NewLine); } Invoke(new Action(() => { label1.Text = "共计数据:" + mc.Count; textBox2.Text = sb.ToString(); textBox3.AppendText(e.Uri.ToString() + " 耗时:" + e.MilliSeconds + Environment.NewLine); })); }; await Spider.Start(new Uri(textBox1.Text)); }