private void InitPageWC() { for (int i = 1; i < 3; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/wuchang/n{0}/", i)); request.AddProperty("diqu", "五常"); AddRequest(request); } for (int i = 1; i < 2; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/weilaikejicheng/n{0}/", i)); request.AddProperty("diqu", "五常"); AddRequest(request); } for (int i = 1; i < 4; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/xianlin/n{0}/", i)); request.AddProperty("diqu", "闲林"); AddRequest(request); } Dictionary <string, object> properties3 = new Dictionary <string, object>(); properties3.Add("diqu", "西溪"); for (int i = 1; i < 2; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/xixi/n{0}/", i)); request.AddProperty("diqu", "西溪"); AddRequest(request); } }
private void InitPageWC() { for (int i = 1; i < 3; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/feicuicheng1/pg{0}/", i)); request.AddProperty("diqu", "翡翠城"); AddRequest(request); } for (int i = 1; i < 7; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/weilaikejicheng/pg{0}/", i)); request.AddProperty("diqu", "未来科技城"); AddRequest(request); } for (int i = 1; i < 4; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/xianlin1/pg{0}/", i)); request.AddProperty("diqu", "闲林"); AddRequest(request); } for (int i = 1; i < 5; i++) { var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/xixi/pg{0}/", i)); request.AddProperty("diqu", "西溪"); AddRequest(request); } }
protected override void Handle(Page page) { ///如果是小区列表页面,则从列表中获取内容页的地址加入到后续目标中 if (Regex.IsMatch(page.TargetUrl, "https://hz.5i5j.com/xiaoqu/[\\s\\S]+")) { //获取小区二手房的链接,并加入列表 var totalCnblogElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='list-con-box']/ul[@class='pList']/li/div[@class='listCon']/div[@class='listX']/div[@class='jia']/a")).Links().GetValues(); foreach (var cnblogElement in totalCnblogElements) { var request = BaseFunction.CreateRequest(cnblogElement); page.AddTargetRequest(request); } Logger?.LogInformation($"{page.TargetUrl}页面获取到小区连接{ totalCnblogElements.ToList().Count }个"); //获取每个小区的信息 var totalXiaoQuElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='list-con-box']/ul[@class='pList']/li")).Nodes(); var xiaoquList = new List <O5I5JXiaoQuEntity>(); var xiaoquPriceList = new List <O5I5JXiaoQuPriceEntity>(); foreach (var xiaoquElement in totalXiaoQuElements) { try { var xiaoqu = new O5I5JXiaoQuEntity(); xiaoqu.Name = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/h3/a/text()")).GetValue(); xiaoqu.Url = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/h3/a/@href")).GetValue(); xiaoqu.Describe = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/p[2]")).GetValue().Trim(); string[] tempDesc = xiaoqu.Describe.Split('>'); if (tempDesc.Length == 3) { xiaoqu.Describe = tempDesc[2].Trim(); } xiaoqu.Region = page.Request.Properties["diqu"]; xiaoqu.Id = xiaoqu.Url.Substring(xiaoqu.Url.LastIndexOf('/') + 1, xiaoqu.Url.LastIndexOf('.') - xiaoqu.Url.LastIndexOf('/') - 1); xiaoqu.Url = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/a/@href")).GetValue(); xiaoquList.Add(xiaoqu); var xiaoquPrice = new O5I5JXiaoQuPriceEntity(); xiaoquPrice.Id = xiaoqu.Id; xiaoquPrice.Date1 = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd"); xiaoquPrice.Price = BaseFunction.TryParseDecimal(xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/p[@class='redC']/strong/text()")).GetValue()); xiaoquPrice.PriceRange = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/p[2]/text()")).GetValue(); xiaoquPrice.OnSellCount = BaseFunction.TryParseInt(xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/a/p/span/text()")).GetValue()); xiaoquPriceList.Add(xiaoquPrice); } catch (Exception ex) { Console.WriteLine(ex); } } Logger?.LogInformation($"{page.TargetUrl}页面获取到小区信息{xiaoquList.Count}个,获取到小区价格信息{xiaoquPriceList.Count}个"); page.AddResultItem("Result", xiaoquList); page.AddResultItem("Result1", xiaoquPriceList); } ///如果是房屋列表页 else if (Regex.IsMatch(page.TargetUrl, "https://hz.5i5j.com/xq-ershoufang/[\\s\\S]+")) { //第一页的时候,获取翻页列表 try { if (page.TargetUrl.Length - page.TargetUrl.LastIndexOf('n') > 4) { var pageElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='pageBox']/div[@class='pageSty rf']/a")).Nodes().ToList(); if (pageElements != null && pageElements.Count > 2) { for (int i = 1; i < pageElements.Count - 1; ++i) { page.AddTargetRequest(BaseFunction.CreateRequest(pageElements[i].Links().GetValue())); } } Logger?.LogInformation($"{page.TargetUrl}页面获取分页信息{pageElements.Count-2}个"); } } catch (Exception ex) { Console.WriteLine(ex); } string xiaoquID = ""; Regex regex = new Regex("\\d+"); var matchs = regex.Matches(page.TargetUrl); if (matchs.Count > 0) { foreach (Match item in matchs) { if (item.Value.Length > 5) { xiaoquID = item.Value; break; } } } //获取每个房子的信息 var totalFangZiElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='list-con-box'][1]/ul/li/div[@class='listCon']")).Nodes(); var houseList = new List <O5I5JHouseEntity>(); var housePriceList = new List <O5I5JHousePriceEntity>(); foreach (var fangElement in totalFangZiElements) { try { var house = new O5I5JHouseEntity(); house.Title = fangElement.Select(Selectors.XPath(".//h3[@class='listTit']/a/text()")).GetValue(); house.Url = fangElement.Select(Selectors.XPath(".//h3[@class='listTit']/a/@href")).GetValue(); house.Id = house.Url.Substring(house.Url.LastIndexOf('/') + 1, house.Url.LastIndexOf('.') - house.Url.LastIndexOf('/') - 1); house.XiaoQuId = xiaoquID; string huxmj = fangElement.Select(Selectors.XPath(".//div[@class='listX']/p[1]")).GetValue(); string[] tt = huxmj.Split('·'); if (tt.Length > 3) { house.MianJi = BaseFunction.TryParseDecimal(tt[1].Replace("平", "").Replace("米", "").Trim()); string[] thx = tt[0].Split('>'); if (thx.Length == 3) { house.HuXing = thx[2]; } } huxmj = fangElement.Select(Selectors.XPath(".//div[@class='listX']/p[3]")).GetValue(); tt = huxmj.Split('·'); if (tt.Length > 2) { house.PublicTime = BaseFunction.TryParseDateTime(tt[2].Replace("发", "").Replace("布", "").Trim()); } houseList.Add(house); var housePrice = new O5I5JHousePriceEntity(); housePrice.Id = house.Id; housePrice.Date1 = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd"); housePrice.Price = BaseFunction.TryParseDecimal(fangElement.Select(Selectors.XPath(".//div[@class='jia']/p[1]/strong/text()")).GetValue()); string ttxx = fangElement.Select(Selectors.XPath(".//div[@class='jia']/p[2]/text()")).GetValue(); string[] ttxx2 = ttxx.Split('/'); if (ttxx2.Length == 2) { ttxx = ttxx2[0]; } housePrice.SumPrice = BaseFunction.TryParseDecimal(ttxx .Replace("单价", "") .Replace("元", "")); housePriceList.Add(housePrice); } catch (Exception ex) { Console.WriteLine(ex); } } Logger?.LogInformation($"{page.TargetUrl}页面获取到二手房信息{houseList.Count}个,获取到二手房价格信息{housePriceList.Count}个"); page.AddResultItem("Result2", houseList); page.AddResultItem("Result3", housePriceList); } }
protected override void Handle(Page page) { ///如果是小区列表页面,则从列表中获取内容页的地址加入到后续目标中 if (Regex.IsMatch(page.TargetUrl, "https://hz.lianjia.com/xiaoqu/[\\s\\S]+")) { //获取小区二手房的链接,并加入列表 //var totalCnblogElements = page.Selectable().SelectList(Selectors.XPath("//ul[@class='listContent']/li/div[@class='xiaoquListItemRight']/div[@class='xiaoquListItemSellCount']/a")).Links().GetValues(); //foreach (var cnblogElement in totalCnblogElements) //{ // var request = BaseFunction.CreateRequest(cnblogElement); // page.AddTargetRequest(request); //} //Logger?.LogInformation($"{page.TargetUrl}页面获取到小区连接{ totalCnblogElements.ToList().Count }个"); //获取每个小区的信息 var totalXiaoQuElements = page.Selectable().SelectList(Selectors.XPath("//ul[@class='listContent']/li")).Nodes(); var xiaoquList = new List <LianJiaXiaoQuEntity>(); var xiaoquPriceList = new List <LianJiaXiaoQuPriceEntity>(); foreach (var xiaoquElement in totalXiaoQuElements) { try { var xiaoqu = new LianJiaXiaoQuEntity(); xiaoqu.Name = xiaoquElement.Select(Selectors.XPath(".//div[@class='info']/div[@class='title']/a/text()")).GetValue(); xiaoqu.Url = xiaoquElement.Select(Selectors.XPath(".//div[@class='xiaoquListItemSellCount']/a/@href")).GetValue(); xiaoqu.Describe = xiaoquElement.Select(Selectors.XPath(".//div[@class='houseInfo']/a/text()")).GetValue().Trim(); xiaoqu.Region = page.Request.Properties["diqu"]; xiaoqu.Id = xiaoqu.Url.Substring(xiaoqu.Url.LastIndexOf('c'), xiaoqu.Url.LastIndexOf('/') - xiaoqu.Url.LastIndexOf('c')); xiaoquList.Add(xiaoqu); var xiaoquPrice = new LianJiaXiaoQuPriceEntity(); xiaoquPrice.Id = xiaoqu.Id; xiaoquPrice.Date1 = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd"); xiaoquPrice.Price = BaseFunction.TryParseDecimal(xiaoquElement.Select(Selectors.XPath(".//div[@class='xiaoquListItemPrice']/div[@class='totalPrice']/span/text()")).GetValue()); //xiaoquPrice.PriceRange = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/p[2]/text()")).GetValue(); xiaoquPrice.OnSellCount = BaseFunction.TryParseInt(xiaoquElement.Select(Selectors.XPath(".//div[@class='xiaoquListItemSellCount']/a/span/text()")).GetValue()); xiaoquPriceList.Add(xiaoquPrice); if (xiaoquPrice.OnSellCount > 0) { page.AddTargetRequest(BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/ershoufang/{0}/", xiaoqu.Id))); } if (xiaoquPrice.OnSellCount > 30) { int i = 1 + xiaoquPrice.OnSellCount / 30; Logger?.LogInformation($"{xiaoqu.Id}小区在售{xiaoquPrice.OnSellCount}个,分{i}页"); for (int index = 2; index <= i; ++index) { page.AddTargetRequest(BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/ershoufang/pg{0}{1}/", index, xiaoqu.Id))); } } } catch (Exception ex) { Console.WriteLine(ex); } } Logger?.LogInformation($"{page.TargetUrl}页面获取到小区信息{xiaoquList.Count}个,获取到小区价格信息{xiaoquPriceList.Count}个"); page.AddResultItem("Result", xiaoquList); page.AddResultItem("Result1", xiaoquPriceList); } ///如果是房屋列表页 else if (Regex.IsMatch(page.TargetUrl, "https://hz.lianjia.com/ershoufang/[\\s\\S]+")) { string xiaoquID = ""; Regex regex = new Regex("c\\d+"); var matchs = regex.Matches(page.TargetUrl); if (matchs.Count > 0) { foreach (Match item in matchs) { if (item.Value.Length > 5) { xiaoquID = item.Value; break; } } } //获取每个房子的信息 var totalFangZiElements = page.Selectable().SelectList(Selectors.XPath("//ul[@class='sellListContent']/li/div[@class='info clear']")).Nodes(); var houseList = new List <LianJiaHouseEntity>(); var housePriceList = new List <LianJiaHousePriceEntity>(); foreach (var fangElement in totalFangZiElements) { try { var house = new LianJiaHouseEntity(); house.Title = fangElement.Select(Selectors.XPath(".//div[@class='title']/a/text()")).GetValue(); house.Url = fangElement.Select(Selectors.XPath(".//div[@class='title']/a/@href")).GetValue(); house.Id = house.Url.Substring(house.Url.LastIndexOf('/') + 1, house.Url.LastIndexOf('.') - house.Url.LastIndexOf('/') - 1); house.XiaoQuId = xiaoquID; string huxmj = fangElement.Select(Selectors.XPath(".//div[@class='address']/div[@class='houseInfo']")).GetValue(); if (huxmj.Length > 3) { Regex mianjiregex = new Regex("\\d+[\\.\\d+]*平米"); var mianjimatch = mianjiregex.Match(huxmj); if (mianjimatch != null) { house.MianJi = BaseFunction.TryParseDecimal(mianjimatch.Value.Replace("平", "").Replace("米", "").Trim()); } Regex huxingregex = new Regex("\\d+室[\\d+厅]*"); var huxingmatch = huxingregex.Match(huxmj); if (huxingmatch != null) { house.HuXing = huxingmatch.Value; } } //huxmj = fangElement.Select(Selectors.XPath(".//div[@class='listX']/p[3]")).GetValue(); //tt = huxmj.Split('·'); //if (tt.Length > 2) //{ // house.PublicTime = BaseFunction.TryParseDateTime(tt[2].Replace("发", "").Replace("布", "").Trim()); //} houseList.Add(house); var housePrice = new LianJiaHousePriceEntity(); housePrice.Id = house.Id; housePrice.Date1 = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd"); housePrice.Price = BaseFunction.TryParseDecimal(fangElement.Select(Selectors.XPath(".//div[@class='priceInfo']/div[@class='totalPrice']/span/text()")).GetValue()); string ttxx = fangElement.Select(Selectors.XPath(".//div[@class='priceInfo']/div[@class='unitPrice']/span/text()")).GetValue(); string[] ttxx2 = ttxx.Split('/'); if (ttxx2.Length == 2) { ttxx = ttxx2[0]; } housePrice.SumPrice = BaseFunction.TryParseDecimal(ttxx .Replace("单价", "") .Replace("元", "")); housePriceList.Add(housePrice); } catch (Exception ex) { Console.WriteLine(ex); } } Logger?.LogInformation($"{page.TargetUrl}页面获取到二手房信息{houseList.Count}个,获取到二手房价格信息{housePriceList.Count}个"); page.AddResultItem("Result2", houseList); page.AddResultItem("Result3", housePriceList); } }