Пример #1
0
        private async void SelectHangzhou()
        {
            /**************************************************************
            *
            *    目前只能爬取到第一页的数据信息
            *
            *    Only crawl the first page data
            *
            **************************************************************/



            textBox1.Text = "http://hotels.ctrip.com/hotel/hangzhou17";
            WebSpider Spider = new WebSpider();

            Spider.OnStartEvent += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox3.AppendText(e.Uri.ToString() + " 开始" + Environment.NewLine);
                }));
            };
            Spider.OnExceptionEvent += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox3.AppendText(e.Uri.ToString() + " 异常:" + e.Exception.Message + Environment.NewLine);
                }));
            };
            Spider.OnCompletedEvent += (s, e) =>
            {
                //<span class="hotel_num">1</span>杭州马可波罗假日酒店</a>
                string          pattern = "<span class=\"hotel_num\">[0-9]+</span>[^<]+";
                string          addr    = "<p class=\"searchresult_htladdress\">[\\S]+";
                MatchCollection mc      = Regex.Matches(e.PageSource, pattern, RegexOptions.IgnoreCase);
                MatchCollection mcaddr  = Regex.Matches(e.PageSource, addr, RegexOptions.IgnoreCase);
                StringBuilder   sb      = new StringBuilder();

                int index = 0;
                foreach (Match m in mc)
                {
                    sb.Append(m.Value.Substring(m.Value.LastIndexOf('>') + 1) +
                              "  地址[" + mcaddr[index].Value.Substring(mcaddr[index].Value.LastIndexOf('>') + 1) + "]" + Environment.NewLine);
                    //sb.Append(Regex.Match(m.Value, "/hotel/.+[^\\\"]").Value.Substring(1) + Environment.NewLine);
                    //sb.Append(m.Value + Environment.NewLine);
                    index++;
                }

                Invoke(new Action(() =>
                {
                    label1.Text   = "共计数据:" + mc.Count;
                    textBox2.Text = sb.ToString();
                    textBox3.AppendText(e.Uri.ToString() + " 耗时:" + e.MilliSeconds + Environment.NewLine);
                }));
            };
            await Spider.Start(new Uri(textBox1.Text));
        }
Пример #2
0
        private async void SelectCities()
        {
            textBox1.Text = "http://hotels.ctrip.com/citylist";
            WebSpider Spider = new WebSpider();

            Spider.OnStartEvent += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox3.AppendText(e.Uri.ToString() + " 开始" + Environment.NewLine);
                }));
            };
            Spider.OnExceptionEvent += (s, e) =>
            {
                Invoke(new Action(() =>
                {
                    textBox3.AppendText(e.Uri.ToString() + " 异常:" + e.Exception.Message + Environment.NewLine);
                }));
            };
            Spider.OnCompletedEvent += (s, e) =>
            {
                string          pattern = "<a href=\"/hotel/[a-z0-9]+\" title=\"[^\"]+\">[^<]+</a>";
                MatchCollection mc      = Regex.Matches(e.PageSource, pattern, RegexOptions.IgnoreCase);
                StringBuilder   sb      = new StringBuilder();
                foreach (Match m in mc)
                {
                    sb.Append(Regex.Match(m.Value, ">[^<]+").Value.Substring(1) + Environment.NewLine);
                    //sb.Append(Regex.Match(m.Value, "/hotel/.+[^\\\"]").Value.Substring(1) + Environment.NewLine);
                    //sb.Append(m.Value + Environment.NewLine);
                }

                Invoke(new Action(() =>
                {
                    label1.Text   = "共计数据:" + mc.Count;
                    textBox2.Text = sb.ToString();
                    textBox3.AppendText(e.Uri.ToString() + " 耗时:" + e.MilliSeconds + Environment.NewLine);
                }));
            };
            await Spider.Start(new Uri(textBox1.Text));
        }