public void Run()
 {
     try
     {
         int      captrueHouseCount = 0;
         DateTime startTime         = DateTime.Now;
         foreach (var config in configDapper.GetList(ConstConfigName.Beike))
         {
             LogHelper.RunActionNotThrowEx(() =>
             {
                 List <BaseHouseInfo> houses = new List <BaseHouseInfo>();
                 var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(config.ConfigurationValue);
                 for (var pageIndex = 0; pageIndex < confInfo.pagecount.Value; pageIndex++)
                 {
                     var lstHouseInfo = GetHouseData(confInfo.citySortName.Value, confInfo.cityID.Value,
                                                     confInfo.cityName.Value, pageIndex);
                     houses.AddRange(lstHouseInfo);
                 }
                 captrueHouseCount = captrueHouseCount + houses.Count;
                 houseDapper.BulkInsertHouses(houses);
             }, "BeikeHouseCrawler CaptureHouseInfo ", config);
         }
         LogHelper.Info($"BeikeHouseCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。");
     }
     catch (Exception ex)
     {
         LogHelper.Error("BeikeHouseCrawler CrawlerHouseInfo Exception", ex);
     }
 }
Example #2
0
 public void Run()
 {
     foreach (var conf in configDapper.GetList(ConstConfigName.MoguHouse))
     {
         LogHelper.RunActionNotThrowEx(() =>
         {
             List <BaseHouseInfo> houses = new List <BaseHouseInfo>();
             var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(conf.ConfigurationValue);
             var cityName = confInfo.cityname.Value;
             var cityId   = (int)confInfo.cityid.Value;
             // 2:合租 3:整租 5:业主房源
             var rentTypes = new List <int>()
             {
                 2, 3, 5
             };
             foreach (var rentType in rentTypes)
             {
                 for (var pageIndex = 1; pageIndex <= confInfo.pagecount.Value; pageIndex++)
                 {
                     var list = GetHouseData(cityName, cityId, pageIndex, rentType);
                     houses.AddRange(list);
                 }
             }
             houseDapper.BulkInsertHouses(houses);
         }, "MoGuHouseCrawler Run ", conf);
     }
 }
Example #3
0
        public void Run()
        {
            int      captrueHouseCount = 0;
            DateTime startTime         = DateTime.Now;

            var peopleRentingConf = configDapper.GetList(ConstConfigName.HuZhuZuFang)
                                    .FirstOrDefault();

            var pageCount = peopleRentingConf != null
                ? JsonConvert.DeserializeObject <dynamic>(peopleRentingConf.ConfigurationValue).pagecount.Value
                : 10;

            var hsHouseOnlineUrl        = new HashSet <string>();
            List <BaseHouseInfo> houses = new List <BaseHouseInfo>();

            for (var pageNum = 1; pageNum < pageCount; pageNum++)
            {
                string result = getResultFromAPI(pageNum);
                houses.AddRange(GetHouseData(result));
            }
            houseDapper.BulkInsertHouses(houses);
            captrueHouseCount = captrueHouseCount + houses.Count;

            LogHelper.Info($"PeopleRentingCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。");
        }
Example #4
0
 public void Run()
 {
     foreach (var config in _configDapper.LoadBySource(Source))
     {
         var confInfo = JsonConvert.DeserializeObject <JToken>(config.Json);
         for (var pageNum = 1; pageNum < confInfo["pagecount"].ToObject <int>(); pageNum++)
         {
             var htmlOrJson = GetJsonOrHTML(confInfo, pageNum);
             var houses     = ParseHouses(confInfo, htmlOrJson);
             _houseDapper.BulkInsertHouses(houses);
         }
     }
 }
        private int CaptureHouse()
        {
            int captrueHouseCount       = 0;
            List <BaseHouseInfo> houses = new List <BaseHouseInfo>();

            for (var pageNum = 1; pageNum < 20; pageNum++)
            {
                var result = GetHTML(pageNum);
                houses.AddRange(GetHouseDataFromHTML(result));
            }
            captrueHouseCount = captrueHouseCount + houses.Count;
            houseDapper.BulkInsertHouses(houses);
            return(captrueHouseCount);
        }
Example #6
0
        public void Run()
        {
            Console.WriteLine($"【{Source.GetSourceName()}】 running");
            _redisTool.WriteHash(RedisKeys.CurrentCrawler, Source.GetSourceName(), "running");
            var configs = _configDapper.LoadBySource(Source.GetSourceName());

            Console.WriteLine($"configs count:{configs.Count}");
            foreach (var config in configs)
            {
                try
                {
                    Console.WriteLine($"开始抓取【{config.City}】房源数据.");
                    var sw = new System.Diagnostics.Stopwatch();
                    sw.Start();
                    var crawlerKey = RedisKeys.CrawlerState.CopyOne(config.Source);
                    _redisTool.WriteHash(crawlerKey, config.City, "running");
                    for (var pageNum = 0; pageNum < config.PageCount; pageNum++)
                    {
                        var htmlOrJson = GetJsonOrHTML(config, pageNum);
                        if (string.IsNullOrEmpty(htmlOrJson))
                        {
                            Console.WriteLine($"[{DateTime.Now}]|当前页数:{pageNum}抓取失败.");
                            break;
                        }
                        var houses = ParseHouses(config, htmlOrJson);
                        Console.WriteLine($"[{DateTime.Now}]|当前页数:{pageNum},共抓取:{houses.Count}条数据.");
                        _houseDapper.BulkInsertHouses(houses);
                        _elasticService.SaveHouses(houses);
                    }
                    sw.Stop();
                    var time = sw.Elapsed.TotalSeconds.ToString(CultureInfo.InvariantCulture);
                    Console.WriteLine($"完成{config.City}房源数据抓取, 耗时:{time}");
                    _redisTool.WriteHash(crawlerKey, config.City, "finish");
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"ex:{ex.ToString()},config:{config.City}");
                }
            }
            _redisTool.WriteHash(RedisKeys.CurrentCrawler, Source.GetSourceName(), "stop");
            Console.WriteLine($"{Source.GetSourceName()} stop");
        }
 public void Run()
 {
     foreach (var doubanConf in configDapper.GetList(ConstConfigName.Zuber))
     {
         LogHelper.RunActionNotThrowEx(() =>
         {
             List <BaseHouseInfo> houses = new List <BaseHouseInfo>();
             var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(doubanConf.ConfigurationValue);
             var cityName = confInfo.cityname.Value;
             var sequence = "";
             for (var i = 0; i <= confInfo.pagecount.Value; i++)
             {
                 var tupleResult = GetHouseData(cityName, sequence);
                 sequence        = tupleResult.Item2;
                 houses.AddRange(tupleResult.Item1);
             }
             houseDapper.BulkInsertHouses(houses);
         }, "DoubanHouseCrawler CaptureHouseInfo ", doubanConf);
     }
 }
 public void Run()
 {
     foreach (var config in _configDapper.LoadBySource(Source.GetSourceName()))
     {
         LogHelper.RunActionNotThrowEx(() =>
         {
             for (var pageNum = 0; pageNum < config.PageCount; pageNum++)
             {
                 var htmlOrJson = GetJsonOrHTML(config, pageNum);
                 if (string.IsNullOrEmpty(htmlOrJson))
                 {
                     return;
                 }
                 var houses = ParseHouses(config, htmlOrJson);
                 _houseDapper.BulkInsertHouses(houses);
                 _elasticService.SaveHouses(houses);
             }
         }, Source.GetSourceName(), config);
     }
 }
Example #9
0
        private int CaptureHouse(CrawlerConfig crawlerConfiguration)
        {
            var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue);

            if (confInfo.shortcutname == null || string.IsNullOrEmpty(confInfo.shortcutname.Value))
            {
                return(0);
            }
            int    captrueHouseCount    = 0;
            string cityShortCutName     = confInfo.shortcutname.Value;
            List <BaseHouseInfo> houses = new List <BaseHouseInfo>();

            for (var pageNum = 1; pageNum < confInfo.pagecount.Value; pageNum++)
            {
                var result = GetResultByAPI(cityShortCutName, pageNum);
                houses.AddRange(GetHouseData(cityShortCutName, result));
            }
            captrueHouseCount = captrueHouseCount + houses.Count;
            houseDapper.BulkInsertHouses(houses);
            return(captrueHouseCount);
        }
        public void Run()
        {
            int      captrueHouseCount = 0;
            DateTime startTime         = DateTime.Now;

            foreach (var crawlerConfiguration in configDapper.GetList(ConstConfigName.Chengdufgj).ToList())
            {
                LogHelper.RunActionNotThrowEx(() =>
                {
                    List <BaseHouseInfo> houses = new List <BaseHouseInfo>();
                    var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue);
                    for (var pageNum = 1; pageNum < confInfo.pagecount.Value; pageNum++)
                    {
                        var url       = $"http://zf.cdfgj.gov.cn/{confInfo.path.Value}page={pageNum}";
                        var houseHTML = GetHouseHTML(url);
                        houses.AddRange(GetDataFromHMTL(confInfo.cityname.Value, houseHTML));
                    }
                    houseDapper.BulkInsertHouses(houses);
                    captrueHouseCount = captrueHouseCount + houses.Count;
                }, "CapturBaiXing", crawlerConfiguration);
            }
            LogHelper.Info($"ChengduZufangCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。");
        }
Example #11
0
        public void Run()
        {
            int      captrueHouseCount = 0;
            DateTime startTime         = DateTime.Now;

            foreach (var crawlerConfiguration in configDapper.GetList(ConstConfigName.PinPaiGongYu)
                     .Where(c => c.IsEnabled).ToList())
            {
                LogHelper.RunActionNotThrowEx(() =>
                {
                    List <BaseHouseInfo> houses = new List <BaseHouseInfo>();
                    var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue);
                    for (var page = 0; page < confInfo.pagecount.Value; page++)
                    {
                        var jsonDate = GetDataFromAPI(confInfo.shortcutname.Value, page);
                        houses.AddRange(GetHouses(confInfo.shortcutname.Value, confInfo.cityname.Value, jsonDate));
                    }
                    houseDapper.BulkInsertHouses(houses);
                    captrueHouseCount = captrueHouseCount + houses.Count;
                }, "CapturPinPaiHouseInfo", crawlerConfiguration);
            }

            LogHelper.Info($"PinPaiGongYuHouseCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。");
        }