public void Run() { try { int captrueHouseCount = 0; DateTime startTime = DateTime.Now; foreach (var config in configDapper.GetList(ConstConfigName.Beike)) { LogHelper.RunActionNotThrowEx(() => { List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(config.ConfigurationValue); for (var pageIndex = 0; pageIndex < confInfo.pagecount.Value; pageIndex++) { var lstHouseInfo = GetHouseData(confInfo.citySortName.Value, confInfo.cityID.Value, confInfo.cityName.Value, pageIndex); houses.AddRange(lstHouseInfo); } captrueHouseCount = captrueHouseCount + houses.Count; houseDapper.BulkInsertHouses(houses); }, "BeikeHouseCrawler CaptureHouseInfo ", config); } LogHelper.Info($"BeikeHouseCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。"); } catch (Exception ex) { LogHelper.Error("BeikeHouseCrawler CrawlerHouseInfo Exception", ex); } }
public void Run() { foreach (var conf in configDapper.GetList(ConstConfigName.MoguHouse)) { LogHelper.RunActionNotThrowEx(() => { List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(conf.ConfigurationValue); var cityName = confInfo.cityname.Value; var cityId = (int)confInfo.cityid.Value; // 2:合租 3:整租 5:业主房源 var rentTypes = new List <int>() { 2, 3, 5 }; foreach (var rentType in rentTypes) { for (var pageIndex = 1; pageIndex <= confInfo.pagecount.Value; pageIndex++) { var list = GetHouseData(cityName, cityId, pageIndex, rentType); houses.AddRange(list); } } houseDapper.BulkInsertHouses(houses); }, "MoGuHouseCrawler Run ", conf); } }
public void Run() { int captrueHouseCount = 0; DateTime startTime = DateTime.Now; var peopleRentingConf = configDapper.GetList(ConstConfigName.HuZhuZuFang) .FirstOrDefault(); var pageCount = peopleRentingConf != null ? JsonConvert.DeserializeObject <dynamic>(peopleRentingConf.ConfigurationValue).pagecount.Value : 10; var hsHouseOnlineUrl = new HashSet <string>(); List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); for (var pageNum = 1; pageNum < pageCount; pageNum++) { string result = getResultFromAPI(pageNum); houses.AddRange(GetHouseData(result)); } houseDapper.BulkInsertHouses(houses); captrueHouseCount = captrueHouseCount + houses.Count; LogHelper.Info($"PeopleRentingCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。"); }
public void Run() { foreach (var config in _configDapper.LoadBySource(Source)) { var confInfo = JsonConvert.DeserializeObject <JToken>(config.Json); for (var pageNum = 1; pageNum < confInfo["pagecount"].ToObject <int>(); pageNum++) { var htmlOrJson = GetJsonOrHTML(confInfo, pageNum); var houses = ParseHouses(confInfo, htmlOrJson); _houseDapper.BulkInsertHouses(houses); } } }
private int CaptureHouse() { int captrueHouseCount = 0; List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); for (var pageNum = 1; pageNum < 20; pageNum++) { var result = GetHTML(pageNum); houses.AddRange(GetHouseDataFromHTML(result)); } captrueHouseCount = captrueHouseCount + houses.Count; houseDapper.BulkInsertHouses(houses); return(captrueHouseCount); }
public void Run() { Console.WriteLine($"【{Source.GetSourceName()}】 running"); _redisTool.WriteHash(RedisKeys.CurrentCrawler, Source.GetSourceName(), "running"); var configs = _configDapper.LoadBySource(Source.GetSourceName()); Console.WriteLine($"configs count:{configs.Count}"); foreach (var config in configs) { try { Console.WriteLine($"开始抓取【{config.City}】房源数据."); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var crawlerKey = RedisKeys.CrawlerState.CopyOne(config.Source); _redisTool.WriteHash(crawlerKey, config.City, "running"); for (var pageNum = 0; pageNum < config.PageCount; pageNum++) { var htmlOrJson = GetJsonOrHTML(config, pageNum); if (string.IsNullOrEmpty(htmlOrJson)) { Console.WriteLine($"[{DateTime.Now}]|当前页数:{pageNum}抓取失败."); break; } var houses = ParseHouses(config, htmlOrJson); Console.WriteLine($"[{DateTime.Now}]|当前页数:{pageNum},共抓取:{houses.Count}条数据."); _houseDapper.BulkInsertHouses(houses); _elasticService.SaveHouses(houses); } sw.Stop(); var time = sw.Elapsed.TotalSeconds.ToString(CultureInfo.InvariantCulture); Console.WriteLine($"完成{config.City}房源数据抓取, 耗时:{time}"); _redisTool.WriteHash(crawlerKey, config.City, "finish"); } catch (Exception ex) { Console.WriteLine($"ex:{ex.ToString()},config:{config.City}"); } } _redisTool.WriteHash(RedisKeys.CurrentCrawler, Source.GetSourceName(), "stop"); Console.WriteLine($"{Source.GetSourceName()} stop"); }
public void Run() { foreach (var doubanConf in configDapper.GetList(ConstConfigName.Zuber)) { LogHelper.RunActionNotThrowEx(() => { List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(doubanConf.ConfigurationValue); var cityName = confInfo.cityname.Value; var sequence = ""; for (var i = 0; i <= confInfo.pagecount.Value; i++) { var tupleResult = GetHouseData(cityName, sequence); sequence = tupleResult.Item2; houses.AddRange(tupleResult.Item1); } houseDapper.BulkInsertHouses(houses); }, "DoubanHouseCrawler CaptureHouseInfo ", doubanConf); } }
public void Run() { foreach (var config in _configDapper.LoadBySource(Source.GetSourceName())) { LogHelper.RunActionNotThrowEx(() => { for (var pageNum = 0; pageNum < config.PageCount; pageNum++) { var htmlOrJson = GetJsonOrHTML(config, pageNum); if (string.IsNullOrEmpty(htmlOrJson)) { return; } var houses = ParseHouses(config, htmlOrJson); _houseDapper.BulkInsertHouses(houses); _elasticService.SaveHouses(houses); } }, Source.GetSourceName(), config); } }
private int CaptureHouse(CrawlerConfig crawlerConfiguration) { var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue); if (confInfo.shortcutname == null || string.IsNullOrEmpty(confInfo.shortcutname.Value)) { return(0); } int captrueHouseCount = 0; string cityShortCutName = confInfo.shortcutname.Value; List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); for (var pageNum = 1; pageNum < confInfo.pagecount.Value; pageNum++) { var result = GetResultByAPI(cityShortCutName, pageNum); houses.AddRange(GetHouseData(cityShortCutName, result)); } captrueHouseCount = captrueHouseCount + houses.Count; houseDapper.BulkInsertHouses(houses); return(captrueHouseCount); }
public void Run() { int captrueHouseCount = 0; DateTime startTime = DateTime.Now; foreach (var crawlerConfiguration in configDapper.GetList(ConstConfigName.Chengdufgj).ToList()) { LogHelper.RunActionNotThrowEx(() => { List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue); for (var pageNum = 1; pageNum < confInfo.pagecount.Value; pageNum++) { var url = $"http://zf.cdfgj.gov.cn/{confInfo.path.Value}page={pageNum}"; var houseHTML = GetHouseHTML(url); houses.AddRange(GetDataFromHMTL(confInfo.cityname.Value, houseHTML)); } houseDapper.BulkInsertHouses(houses); captrueHouseCount = captrueHouseCount + houses.Count; }, "CapturBaiXing", crawlerConfiguration); } LogHelper.Info($"ChengduZufangCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。"); }
public void Run() { int captrueHouseCount = 0; DateTime startTime = DateTime.Now; foreach (var crawlerConfiguration in configDapper.GetList(ConstConfigName.PinPaiGongYu) .Where(c => c.IsEnabled).ToList()) { LogHelper.RunActionNotThrowEx(() => { List <BaseHouseInfo> houses = new List <BaseHouseInfo>(); var confInfo = JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue); for (var page = 0; page < confInfo.pagecount.Value; page++) { var jsonDate = GetDataFromAPI(confInfo.shortcutname.Value, page); houses.AddRange(GetHouses(confInfo.shortcutname.Value, confInfo.cityname.Value, jsonDate)); } houseDapper.BulkInsertHouses(houses); captrueHouseCount = captrueHouseCount + houses.Count; }, "CapturPinPaiHouseInfo", crawlerConfiguration); } LogHelper.Info($"PinPaiGongYuHouseCrawler finish.本次共爬取到{captrueHouseCount}条数据,耗时{ (DateTime.Now - startTime).TotalSeconds}秒。"); }