private static void GetDataOnPageDoc(dynamic confInfo, AngleSharp.Dom.Html.IHtmlDocument page) { foreach (var element in page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr"))) { var houseTitle = element.QuerySelector("h2").TextContent; var houseInfoList = houseTitle.Split(' '); int housePrice = 0; int.TryParse(element.QuerySelector("b").TextContent, out housePrice); var onlineURL = $"http://{confInfo.shortcutname.Value}.58.com" + element.QuerySelector("a").GetAttribute("href"); if (dataContent.HouseInfos.Any(h => h.HouseOnlineURL == onlineURL)) { continue; } var houseInfo = new BizHouseInfo { HouseTitle = houseTitle, HouseOnlineURL = onlineURL, DisPlayPrice = element.QuerySelector("b").TextContent, HouseLocation = new[] { "公寓", "青年社区" }.All(s => houseInfoList.Contains(s)) ? houseInfoList[0] : houseInfoList[1], DataCreateTime = DateTime.Now, Source = ConstConfigurationName.PinPaiGongYu, HousePrice = housePrice, HouseText = houseTitle, LocationCityName = confInfo.cityname.Value, PubTime = DateTime.Now }; dataContent.Add(houseInfo); } }
private static List <BizHouseInfo> GetDataFromOnlineWeb(string groupID, string cityName, int pageIndex) { HashSet <string> hsDoubanHouseURL = new HashSet <string>(); dataContent.HouseInfos.Where(h => h.Source == ConstConfigurationName.Douban) .Select(h => h.HouseOnlineURL).Distinct().ToList() .ForEach(houseURL => { if (!hsDoubanHouseURL.Contains(houseURL)) { hsDoubanHouseURL.Add(houseURL); } }); List <BizHouseInfo> lstHouseInfo = new List <BizHouseInfo>(); var url = $"https://www.douban.com/group/{groupID}/discussion?start={pageIndex * 25}"; var htmlResult = HTTPHelper.GetHTML(url); if (string.IsNullOrEmpty(htmlResult)) { return(lstHouseInfo); } var page = htmlParser.Parse(htmlResult); foreach (var trItem in page.QuerySelector("table.olt").QuerySelectorAll("tr")) { var titleItem = trItem.QuerySelector("td.title"); if (titleItem == null || hsDoubanHouseURL.Contains(titleItem.QuerySelector("a").GetAttribute("href"))) { continue; } var houseInfo = new BizHouseInfo() { HouseTitle = titleItem.QuerySelector("a").GetAttribute("title"), HouseOnlineURL = titleItem.QuerySelector("a").GetAttribute("href"), HouseLocation = titleItem.QuerySelector("a").GetAttribute("title"), HouseText = titleItem.QuerySelector("a").GetAttribute("title"), DataCreateTime = DateTime.Now, PubTime = titleItem.QuerySelector("td.time") != null ? DateTime.Parse(DateTime.Now.ToString("yyyy-") + titleItem.QuerySelector("td.time").InnerHtml) : DateTime.Now, DisPlayPrice = "", Source = ConstConfigurationName.Douban, HousePrice = 0, LocationCityName = cityName }; lstHouseInfo.Add(houseInfo); } return(lstHouseInfo); }