/// <summary> /// 获取热门城市列表 /// </summary> /// <param name="root">楼盘主页</param> /// <returns></returns> private static Dictionary<string, string> GetCitiesList(string root) { var cities = new Dictionary<string, string>(); var request = new HttpHelper(root); var response = request.GetResponseStream(); var doc = new HtmlDocument(); doc.Load(response,Encoding.GetEncoding("GB2312")); var rootNode = doc.DocumentNode; var citiesNodes= rootNode.SelectNodes("/html[1]/body[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]/a"); foreach (var cityNode in citiesNodes) { var href = cityNode.Attributes["href"].Value; if (href.StartsWith("http://newhouse")) { string city = cityNode.InnerText; cities.Add(city,href); Console.WriteLine(city+" "+href); } } return cities; }
/// <summary> /// 获取“楼盘详情”页面 /// </summary> /// <param name="loupanPage">楼盘主页地址</param> /// <returns></returns> private HtmlNode LoupanDetailPage(string loupanPage) { //loupanPage = "http://hailanchenghy.fang.com/"; var request = new HttpHelper(loupanPage); var responseStream = request.GetResponseStream(); var doc = GetHtmlDoc(responseStream); responseStream.Close(); HtmlNode loupanDetailNode; try { loupanDetailNode = doc .OwnerDocument .GetElementbyId("orginalNaviBox") .SelectNodes("./a")[2]; } catch (Exception) { return null; } if (loupanDetailNode.InnerText != "楼盘详情") return null; string loupanDetailUrl = loupanDetailNode.Attributes["href"].Value; request = new HttpHelper(loupanDetailUrl); responseStream = request.GetResponseStream(); if (responseStream == null) { return null; } doc = GetHtmlDoc(responseStream); responseStream.Close(); return doc; }
/// <summary> /// 抓取楼指定城市楼盘列表 /// </summary> /// <param name="item"></param> private void CatchCityLoupanSummary() { finish = false; for (int i = 1; i < 100; i++) { string currentPage = currentUrl + "house/s/b9" + i.ToString(); Console.WriteLine(currentPage); var request = new HttpHelper(currentPage); var responseStream = request.GetResponseStream(); var doc = GetHtmlDoc(responseStream); responseStream.Close(); var loupanList = GetLoupanList(doc); LoupanListParse(loupanList); db.SubmitChanges(); if (finish) break; } }