private List <Lecturer> GetAllLecturer() { int startPage = 1; int no = 1; List <Lecturer> list = new List <Lecturer>(); while (startPage <= totalCount) { string url = "http://www.jiangshi99.com/Search/jiangshi/%E8%AE%B2%E5%B8%88/p/" + startPage + ".html"; WebClient wc = new WebClient(); wc.Encoding = Encoding.UTF8; string strResult = wc.DownloadString(url); Regex reg = new Regex(@"<li class=""jl_right"">(.|\s)*?</li>", RegexOptions.Multiline); MatchCollection ms = reg.Matches(strResult); foreach (Match NextMatch in ms) { string matchStr = NextMatch.Groups[0].Value; string nameMatch = Regex.Match(matchStr, @"<div class=""jl_jname"">(.|\s)*?</div>", RegexOptions.Multiline).Groups[0].Value; string link = Regex.Match(nameMatch, "href=\"(.[^\"]*)\"", RegexOptions.Multiline).Groups[1].Value; string title = Regex.Match(nameMatch, "title=\"(.[^\"]*)\"", RegexOptions.Multiline).Groups[1].Value; string name = title.Split('-')[0]; string price = Regex.Match(matchStr, @"<span class=""kPrice"">((.)*?)</span>", RegexOptions.Multiline).Groups[1].Value; string area = Regex.Match(matchStr, @"<b>擅长领域:</b>((.)*?)</div>", RegexOptions.Multiline).Groups[1].Value; if (string.IsNullOrEmpty(price)) { string detail = wc.DownloadString("http://www.jiangshi99.com" + link); price = Regex.Match(detail, @"<span class=""Red"">((.)*?)</span>", RegexOptions.Multiline).Groups[1].Value; } string lectureName = link.Split('/')[2]; WebClient web = new WebClient(); web.Encoding = Encoding.UTF8; web.Headers.Add(HttpRequestHeader.Host, "www.jiangshi99.com"); web.Headers.Add(HttpRequestHeader.Referer, "http://www.jiangshi99.com"); string phoneDetail = web.DownloadString("http://www.jiangshi99.com/jiangshis/contact_showv2/jshiname/" + lectureName); string[] arr = phoneDetail.Split(','); string status = arr[2].Split(':')[1]; string phone1 = ""; string phone2 = ""; if (status == "3}") { phone1 = ""; phone2 = arr[1].Split(':')[1].Replace("\"", ""); } else if (status == "1}") { phone1 = arr[1].Split(':')[1].Replace("\"", ""); phone2 = arr[0].Split(':')[1].Replace("\"", ""); } Lecturer lecturer = new Lecturer() { No = no, Name = name, DetialLink = "http://www.jiangshi99.com" + link, Title = title, From = "中国讲师网", Price = price, Phone1 = phone1, Phone2 = phone2, Area = area.Replace(" ", " ") }; list.Add(lecturer); no++; } startPage++; } return(list); }
private void GetLecturerFromZhongGuoJiangShi(int pageIndex) { lecturerList.Items.Clear(); lecturerList.BeginUpdate(); lblPage1.Text = "第" + pageIndex + "页"; int start = (pageIndex - 1) * 4 + 1; int end = start + 3; int no = (pageIndex - 1) * 56 + 1; List <Lecturer> list = new List <Lecturer>(); while (start <= end) { string url = "http://www.jiangshi99.com/Search/jiangshi/%E8%AE%B2%E5%B8%88/p/" + start + ".html"; WebClient wc = new WebClient(); wc.Encoding = Encoding.UTF8; string strResult = wc.DownloadString(url); Regex reg = new Regex(@"<li class=""jl_right"">(.|\s)*?</li>", RegexOptions.Multiline); MatchCollection ms = reg.Matches(strResult); foreach (Match NextMatch in ms) { string matchStr = NextMatch.Groups[0].Value; string nameMatch = Regex.Match(matchStr, @"<div class=""jl_jname"">(.|\s)*?</div>", RegexOptions.Multiline).Groups[0].Value; string link = Regex.Match(nameMatch, "href=\"(.[^\"]*)\"", RegexOptions.Multiline).Groups[1].Value; string title = Regex.Match(nameMatch, "title=\"(.[^\"]*)\"", RegexOptions.Multiline).Groups[1].Value; string name = title.Split('-')[0]; string price = Regex.Match(matchStr, @"<span class=""kPrice"">((.)*?)</span>", RegexOptions.Multiline).Groups[1].Value; string area = Regex.Match(matchStr, @"<b>擅长领域:</b>((.)*?)</div>", RegexOptions.Multiline).Groups[1].Value; if (string.IsNullOrEmpty(price)) { string detail = wc.DownloadString("http://www.jiangshi99.com" + link); price = Regex.Match(detail, @"<span class=""Red"">((.)*?)</span>", RegexOptions.Multiline).Groups[1].Value; } string lectureName = link.Split('/')[2]; WebClient web = new WebClient(); web.Encoding = Encoding.UTF8; web.Headers.Add(HttpRequestHeader.Host, "www.jiangshi99.com"); web.Headers.Add(HttpRequestHeader.Referer, "http://www.jiangshi99.com"); string phoneDetail = web.DownloadString("http://www.jiangshi99.com/jiangshis/contact_showv2/jshiname/" + lectureName); string[] arr = phoneDetail.Split(','); string status = arr[2].Split(':')[1]; string phone1 = ""; string phone2 = ""; if (status == "3}") { phone1 = ""; phone2 = arr[1].Split(':')[1].Replace("\"", ""); } else if (status == "1}") { phone1 = arr[1].Split(':')[1].Replace("\"", ""); phone2 = arr[0].Split(':')[1].Replace("\"", ""); } Lecturer lecturer = new Lecturer() { No = no, Name = name, DetialLink = "http://www.jiangshi99.com" + link, Title = title, From = "中国讲师网", Price = price, Phone1 = phone1 == "null" ? "" : phone1, Phone2 = phone2 == "null" ? "" : phone2, Area = area.Replace(" ", " ") }; list.Add(lecturer); ListViewItem item = new ListViewItem(); item.Text = lecturer.No.ToString(); item.SubItems.Add(lecturer.Name); item.SubItems.Add(lecturer.Title); item.SubItems.Add(lecturer.Price); item.SubItems.Add(lecturer.Phone1); item.SubItems.Add(lecturer.Phone2); item.SubItems.Add(lecturer.Area); item.SubItems.Add(lecturer.From); item.SubItems.Add(lecturer.DetialLink); lecturerList.Items.Add(item); no++; } start++; } lecturerList.EndUpdate(); }
private void GetLecturerFromZhongHuaJiangShi(int pageIndex) { lecturerList.Items.Clear(); lecturerList.BeginUpdate(); lblPage1.Text = "第" + pageIndex + "页"; int start = (pageIndex - 1) * 4 + 1; int end = start + 3; int no = (pageIndex - 1) * 56 + 1; List <Lecturer> list = new List <Lecturer>(); while (start <= end) { string url = "http://www.jiangshi.org/search/kw_NULL_order_1_costmin_0_costmax_0_area_0_page_" + start + ".html"; //HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url); //HttpWebResponse res = (HttpWebResponse)req.GetResponse(); //Stream streamReceive = res.GetResponseStream(); //Encoding encoding = Encoding.GetEncoding("UTF-8"); //StreamReader streamReader = new StreamReader(streamReceive, encoding); //string strResult = streamReader.ReadToEnd(); WebClient wc = new WebClient(); wc.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)"); wc.Encoding = Encoding.UTF8; string strResult = wc.DownloadString(url); Regex reg = new Regex(@"<div class=""fl_l"" style=""width: 566px;"">(.|\s)*?</div>", RegexOptions.Multiline); MatchCollection ms = reg.Matches(strResult); foreach (Match NextMatch in ms) { string matchStr = NextMatch.Groups[0].Value; string nameMatch = Regex.Match(matchStr, @"<a(.|\s)*?</a>", RegexOptions.Singleline).Groups[0].Value; string link = Regex.Match(nameMatch, "href=\"(.[^\"]*)\"", RegexOptions.Multiline).Groups[1].Value; string name = Regex.Match(nameMatch, @">((.)*?)</a>", RegexOptions.Multiline).Groups[1].Value; string price = Regex.Match(matchStr, @"<span class=""kPrice"">((.)*?)</span>", RegexOptions.Multiline).Groups[1].Value; string phone = Regex.Match(matchStr, @"<span class=""Cl"">((.)*?)</span>", RegexOptions.Multiline).Groups[1].Value; string phone1 = Regex.Match(matchStr, @"<span class=""Cl Clz"">((.)*?)</span>", RegexOptions.Multiline).Groups[1].Value; phone = string.IsNullOrEmpty(phone) ? phone1 : phone; string area = Regex.Match(matchStr, @"<b>擅长领域:</b>((.)*?)</div>", RegexOptions.Multiline).Groups[1].Value; Lecturer lecturer = new Lecturer() { No = no, Name = name, DetialLink = "http://www.jiangshi99.com" + link, Title = "", From = "中国讲师网", Price = price, Phone1 = phone, Area = area.Replace(" ", " ") }; list.Add(lecturer); ListViewItem item = new ListViewItem(); item.Text = lecturer.No.ToString(); item.SubItems.Add(lecturer.Name); item.SubItems.Add(lecturer.Title); item.SubItems.Add(lecturer.Price); item.SubItems.Add(lecturer.Phone1); item.SubItems.Add(lecturer.Area); item.SubItems.Add(lecturer.From); item.SubItems.Add(lecturer.DetialLink); lecturerList.Items.Add(item); no++; } start++; } lecturerList.EndUpdate(); }