/// <summary> /// 抓取求租 /// </summary> /// <param name="area"></param> public void CrawlDataBegRent(Area area) { try { Crawler crawler = new Crawler(); string url = area.Url + ConstVar.求租 + "0/"; string html = crawler.Crawl(url, Encoding.UTF8); var htmlParse = new HtmlParser(); IHtmlDocument docuement = htmlParse.Parse(html); List <IElement> eles = docuement.QuerySelectorAll("div").ToList().Where(p => p.ClassName == "pager") .ToList(); if (eles.Count > 0) { IHtmlDocument htmlA = htmlParse.Parse(eles[0].InnerHtml); List <IElement> spanEles = htmlA.QuerySelectorAll("span").ToList(); int page = 0; if (spanEles.Count > 2) { IElement el = spanEles[spanEles.Count - 2]; page = int.Parse(el.InnerHtml); } else { page = 1; } List <string> urls = new List <string>();//分页 for (int i = 1; i <= page; i++) { string str = url + "pn" + i + "/"; urls.Add(str); } foreach (var e in urls) { try { //抓取每页 string htmlB = crawler.Crawl(e, Encoding.UTF8); IDocument docuemnt = htmlParse.Parse(htmlB); IElement eleist = docuemnt.QuerySelectorAll("ul").Where(p => p.ClassName == "house-list-wrap") .ToList().FirstOrDefault(); IDocument docuementC = htmlParse.Parse(eleist.InnerHtml); List <IElement> eliss = docuementC.QuerySelectorAll("div").Where(p => p.ClassName == "list-info") .ToList(); //抓取每条 Parallel.For(0, eliss.Count + 1, i => { string itemUrl = string.Empty; try { IDocument documentD = htmlParse.Parse(eliss[i].InnerHtml); IElement eloo = documentD.QuerySelector("a"); itemUrl = eloo.GetAttribute("href"); var htmlE = crawler.Crawl(itemUrl, Encoding.UTF8); //开始解析 IDocument documentE = htmlParse.Parse(htmlE); IElement time = documentE.QuerySelectorAll("div") .FirstOrDefault(o => o.ClassName == "other"); string update = time.InnerHtml.Substring(0, time.InnerHtml.IndexOf("<")) .Replace("发布时间:", "").Trim(); DateTime updateime = ParseTool.StringToDateTime(ParseTool.StringToDateTime(update) .ToShortDateString()); if (updateime > DateTime.Now.AddMonths(-2)) { //标题 string InfoTitle = documentE.QuerySelectorAll("h1").FirstOrDefault().InnerHtml .Trim(); //详细内容 string InfoContent = documentE.QuerySelectorAll("div") .FirstOrDefault(u => u.ClassName == "maincon").InnerHtml.Trim(); //电话 string Phone = documentE.QuerySelectorAll("span") .FirstOrDefault(u => u.ClassName == "phone").InnerHtml.Trim(); //租金 string rentMoney = documentE.QuerySelectorAll("em") .FirstOrDefault(u => u.ClassName == "redfont").InnerHtml.Trim(); //面积 string areasize = htmlParse.Parse(documentE.QuerySelectorAll("ul") .FirstOrDefault(u => u.ClassName == "info").InnerHtml) .QuerySelectorAll("li").ToList()[2].InnerHtml.Replace("面积:", "") .Replace("㎡", "").Trim(); //客户名 string customerName = documentE.QuerySelectorAll("a") .Where(u => u.ClassName == "tx").ToList()[1].InnerHtml.Trim(); var infolilist = htmlParse .Parse(documentE.QuerySelectorAll("ul") .FirstOrDefault(u => u.ClassName == "info") .InnerHtml).QuerySelectorAll("li"); //区域名字 string AreaName = string.Join(",", htmlParse.Parse(infolilist[0].InnerHtml).QuerySelectorAll("a") .Select(p => p.InnerHtml.Trim()).ToList()).Trim(); ShopBegRent shop = new ShopBegRent(); shop.AreaName = AreaName; shop.AreaId = area.Id.ToString(); shop.InfoContent = InfoContent; shop.InfoTitle = InfoTitle; shop.Phone = Phone; shop.MaxRentMoney = ParseTool.StringToDouble(rentMoney) + 1000; shop.MinRentMoney = (ParseTool.StringToDouble(rentMoney) - 1000) > 0 ? (ParseTool.StringToDouble(rentMoney) - 1000) : 0; shop.Customer = customerName; shop.UpdateTime = updateime; if (areasize.Contains("-")) { string[] areasizes = areasize.Split('-'); shop.MinArea = ParseTool.StringToDouble(areasizes[0]); shop.MaxArea = ParseTool.StringToDouble(areasizes[1]); } else { shop.MinArea = ParseTool.StringToDouble(areasize) - 10 > 0 ? double.Parse(areasize) - 10 : 0; shop.MaxArea = ParseTool.StringToDouble(areasize) + 10; } shop.UpdateTime = updateime; shop.Id = Guid.NewGuid(); shopbegrepo.Add(shop); Console.WriteLine(area.Name + "添加了一条商铺求租"); } } catch (Exception exception) { errorUrlrepsitory.Add(new ErrorUrl() { Url = itemUrl, UrlType = UrlType.Item }); log.Error(exception.ToString()); } }); } catch (Exception exception) { errorUrlrepsitory.Add(new ErrorUrl() { Url = e, UrlType = UrlType.Page }); log.Error(exception.ToString()); } } } Console.WriteLine("抓取" + area.Name + "求租信息完成"); } catch (Exception e) { log.Error(e.ToString()); Console.WriteLine(e); } }