/// <summary> /// 抓取出租信息 /// </summary> /// <param name="area"></param> public void CrawlDataCz(Area area) { string url = string.Empty; try { Crawler crawler = new Crawler(); url = area.Url + ConstVar.出租 + "0/"; string html = crawler.Crawl(url, Encoding.UTF8); var htmlParse = new HtmlParser(); IHtmlDocument docuement = htmlParse.Parse(html); List <IElement> eles = docuement.QuerySelectorAll("div").ToList().Where(p => p.ClassName == "pager") .ToList(); if (eles.Count > 0) { IHtmlDocument htmlA = htmlParse.Parse(eles[0].InnerHtml); List <IElement> spanEles = htmlA.QuerySelectorAll("span").ToList(); int page = 0; if (spanEles.Count > 2) { IElement el = spanEles[spanEles.Count - 2]; page = int.Parse(el.InnerHtml); } else { page = 1; } for (int i = 1; i < page + 1; i++) { string str = string.Empty; try { str = url + "pn" + i + "/"; Crawler crawlerA = new Crawler(); string htmlB = crawlerA.Crawl(str, Encoding.UTF8); IDocument docuemnt = htmlParse.Parse(htmlB); IElement eleist = docuemnt.QuerySelectorAll("ul") .Where(p => p.ClassName == "house-list-wrap").ToList().FirstOrDefault(); IDocument docuementC = htmlParse.Parse(eleist.InnerHtml); List <IElement> eliss = docuementC.QuerySelectorAll("div").Where(p => p.ClassName == "pic") .ToList(); Parallel.ForEach(eliss, p => { string urlA = string.Empty; try { IDocument documentD = htmlParse.Parse(p.InnerHtml); IElement eloo = documentD.QuerySelector("a"); urlA = eloo.GetAttribute("href").ToString(); var htmlE = crawler.Crawl(eloo.GetAttribute("href").ToString(), Encoding.UTF8); IDocument documentE = htmlParse.Parse(htmlE); IElement ele = documentE.QuerySelectorAll("span") .Where(o => o.InnerHtml.StartsWith("更新于")).FirstOrDefault(); DateTime time = ParseTool.StringToDateTime(ele.InnerHtml.Replace("更新于", "")); if (time > DateTime.Now.AddMonths(-2)) { IElement InfoTitleElee = documentE.QuerySelectorAll("h1") .FirstOrDefault(o => o.ClassName == "c_000 f20"); IElement money = documentE.QuerySelectorAll("span") .FirstOrDefault(o => o.ClassName == "house_basic_title_money_num"); var InfoContent = documentE.QuerySelectorAll("div") .Where(o => o.ClassName == "general-item-wrap").FirstOrDefault(u => u.ParentElement.ClassName == "general-item general-miaoshu"); var Customer = documentE.QuerySelectorAll("span") .FirstOrDefault(o => o.ClassName == "f14 c_333 jjrsay"); var phone = documentE.QuerySelectorAll("p") .FirstOrDefault(o => o.ClassName == "phone-num"); var InfoEles = htmlParse.Parse(documentE.QuerySelectorAll("ul") .FirstOrDefault(o => o.ClassName == "house_basic_title_content") ?.InnerHtml).QuerySelectorAll("li").ToList(); //面积 IElement areasize = htmlParse.Parse(InfoEles[0].InnerHtml) .QuerySelectorAll("span") .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item2"); //行业名字 IElement IndustryName = htmlParse.Parse(InfoEles[2].InnerHtml) .QuerySelectorAll("span") .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item3"); IElement address = htmlParse.Parse(InfoEles[5].InnerHtml).QuerySelectorAll("a") .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item3 blue-link"); IElement addressDetail = htmlParse.Parse(InfoEles[5].InnerHtml) .QuerySelectorAll("span") .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item3 xxdz-des"); var shoptransfer = new ShopRentOrTransfer() { Id = Guid.NewGuid(), ShopArea = areasize == null ? "" : areasize.InnerHtml, InfoTitle = InfoTitleElee == null ? "" : InfoTitleElee.InnerHtml, TransFerMoney = money == null ? "" : money.InnerHtml, Address = address == null ? "" : string.Join("", address.InnerHtml), DetailAddress = addressDetail == null ? "" : addressDetail.InnerHtml, InfoContent = InfoContent == null ? "" : InfoContent.InnerHtml, InfoType = Model.BaseModel.InfoType.出租, IndustryName = IndustryName == null ? "" : IndustryName.InnerHtml, Customer = Customer == null ? "" : Customer.InnerHtml, Phone = phone == null ? "" : phone.InnerHtml, AreaId = area.Id.ToString(), UpdateTime = time }; var imgUl = documentE.QuerySelectorAll("ul") .FirstOrDefault(o => o.ClassName == "general-pic-list"); object obj = shoprepo.Add(shoptransfer); bool resultId = (bool)obj; Console.WriteLine(area.Name + "添加一条出租信息"); if (imgUl != null && resultId) { IDocument documentf = htmlParse.Parse(imgUl.InnerHtml); var tem = documentf.QuerySelectorAll("img") .Select(o => o.GetAttribute("data-src")); if (tem != null && tem.Count() > 0) { foreach (var o in tem) { if (o != null) { Bitmap img = crawler.CrawlPic(o); if (img != null) { string path = AppDomain.CurrentDomain.BaseDirectory + "Imgs/" + shoptransfer.Id + "/"; if (!Directory.Exists(path)) { Directory.CreateDirectory(path); } string fullPath = path + Guid.NewGuid().ToString().Replace("-", "") + ".png"; img.Save(fullPath); string savePath = fullPath.Replace( AppDomain.CurrentDomain.BaseDirectory, ""); imgrepo.Add(new Model.Image() { FkId = shoptransfer.Id, ImageUrl = savePath, InfoType = TableType.ShopRentOrTransfer, }); } } } } } } } catch (Exception e) { errorUrlrepsitory.Add(new ErrorUrl() { UrlType = UrlType.Item, Url = urlA }); log.Error(e.ToString()); } }); } catch (Exception e) { errorUrlrepsitory.Add(new ErrorUrl() { UrlType = UrlType.Page, Url = str }); log.Error(e.ToString()); } } } Console.WriteLine(area.Name + "出租信息抓取完成"); } catch (Exception e) { errorUrlrepsitory.Add(new ErrorUrl() { UrlType = UrlType.All, Url = url }); log.Error(e.ToString()); } }