public static void CapturPinPaiHouseInfo() { foreach (var crawlerConfiguration in DataContent.CrawlerConfigurations.Where(c => c.ConfigurationName == ConstConfigurationName.PinPaiGongYu && c.IsEnabled).ToList()) { LogHelper.RunActionNotThrowEx(() => { var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue); for (var index = 0; index < confInfo.pagecount.Value; index++) { var url = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/{index}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); var lstLi = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr")); if (!lstLi.Any()) { continue; } GetDataOnPageDoc(confInfo, page); DataContent.SaveChanges(); } }, "CapturPinPaiHouseInfo", crawlerConfiguration); } }
public static void CapturPinPaiHouseInfo() { foreach (var doubanConf in dataContent.CrawlerConfigurations.Where(c => c.ConfigurationName == ConstConfigurationName.PinPaiGongYu && c.IsEnabled).ToList()) { try { var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(doubanConf.ConfigurationValue); for (var index = 0; index < confInfo.pagecount.Value; index++) { var url = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/{index}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); var lstLi = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr")); if (lstLi == null || lstLi.Count() == 0) { continue; } GetDataOnPageDoc(confInfo, page); dataContent.SaveChanges(); } } catch (Exception ex) { LogHelper.Error("PinPaiGongYuHouseCrawler CrawlerHouseInfo Exception", ex); } } }
/// <summary> /// 过滤无效的城市配置 /// </summary> public static void FilterInvalidCityConfig() { foreach (var doubanConf in DataContent.CrawlerConfigurations.Where(c => c.ConfigurationName == ConstConfigurationName.PinPaiGongYu).ToList()) { var confInfo = JsonConvert.DeserializeObject <dynamic>(doubanConf.ConfigurationValue); var url = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/0"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); var lstLi = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr")); if (!lstLi.Any()) { doubanConf.IsEnabled = false; } } DataContent.SaveChanges(); }