Exemple #1
0
        public static void CapturPinPaiHouseInfo()
        {
            foreach (var crawlerConfiguration in DataContent.CrawlerConfigurations.Where(c => c.ConfigurationName
                                                                                         == ConstConfigurationName.PinPaiGongYu && c.IsEnabled).ToList())
            {
                LogHelper.RunActionNotThrowEx(() =>
                {
                    var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue);

                    for (var index = 0; index < confInfo.pagecount.Value; index++)
                    {
                        var url        = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/{index}";
                        var htmlResult = HTTPHelper.GetHTMLByURL(url);
                        var page       = new HtmlParser().Parse(htmlResult);
                        var lstLi      = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr"));
                        if (!lstLi.Any())
                        {
                            continue;
                        }
                        GetDataOnPageDoc(confInfo, page);
                        DataContent.SaveChanges();
                    }
                }, "CapturPinPaiHouseInfo", crawlerConfiguration);
            }
        }
Exemple #2
0
        public static void CapturPinPaiHouseInfo()
        {
            foreach (var doubanConf in dataContent.CrawlerConfigurations.Where(c => c.ConfigurationName
                                                                               == ConstConfigurationName.PinPaiGongYu && c.IsEnabled).ToList())
            {
                try
                {
                    var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(doubanConf.ConfigurationValue);
                    for (var index = 0; index < confInfo.pagecount.Value; index++)
                    {
                        var url        = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/{index}";
                        var htmlResult = HTTPHelper.GetHTMLByURL(url);
                        var page       = new HtmlParser().Parse(htmlResult);
                        var lstLi      = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr"));
                        if (lstLi == null || lstLi.Count() == 0)
                        {
                            continue;
                        }

                        GetDataOnPageDoc(confInfo, page);

                        dataContent.SaveChanges();
                    }
                }
                catch (Exception ex)
                {
                    LogHelper.Error("PinPaiGongYuHouseCrawler CrawlerHouseInfo Exception", ex);
                }
            }
        }
Exemple #3
0
 /// <summary>
 /// 过滤无效的城市配置
 /// </summary>
 public static void FilterInvalidCityConfig()
 {
     foreach (var doubanConf in DataContent.CrawlerConfigurations.Where(c => c.ConfigurationName
                                                                        == ConstConfigurationName.PinPaiGongYu).ToList())
     {
         var confInfo   = JsonConvert.DeserializeObject <dynamic>(doubanConf.ConfigurationValue);
         var url        = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/0";
         var htmlResult = HTTPHelper.GetHTMLByURL(url);
         var page       = new HtmlParser().Parse(htmlResult);
         var lstLi      = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr"));
         if (!lstLi.Any())
         {
             doubanConf.IsEnabled = false;
         }
     }
     DataContent.SaveChanges();
 }