public static List <DoubanHouseInfo> GetDataFromAPI(string groupID, string cityName, int pageIndex) { List <DoubanHouseInfo> lstHouseInfo = new List <DoubanHouseInfo>(); var apiURL = $"https://api.douban.com/v2/group/{groupID}/topics?start={pageIndex * 50}"; var doubanTopic = WebAPIHelper.GetAPIResult <DoubanTopic>(apiURL); if (doubanTopic != null && doubanTopic.topics != null) { foreach (var topic in doubanTopic.topics) { if (DataContent.DoubanHouseInfos.Any(h => h.HouseOnlineURL == topic.share_url)) { continue; } var housePrice = JiebaTools.GetHousePrice(topic.content); var house = new DoubanHouseInfo() { HouseLocation = topic.title, HouseTitle = topic.title, HouseOnlineURL = topic.share_url, HouseText = topic.content, HousePrice = JiebaTools.GetHousePrice(topic.content), IsAnalyzed = true, DisPlayPrice = housePrice > 0 ? $"{housePrice}元" : "", Source = ConstConfigurationName.Douban, LocationCityName = cityName, Status = 1, PubTime = DateTime.Parse(topic.created), DataCreateTime = DateTime.Now, }; lstHouseInfo.Add(house); } } return(lstHouseInfo); }
public override List <DBHouse> ParseHouses(DBConfig config, string data) { var houses = new List <DBHouse>(); var resultJObject = JsonConvert.DeserializeObject <JObject>(data); var city = config.City; foreach (var topic in resultJObject["topics"]) { var housePrice = JiebaTools.GetHousePrice(topic["content"].ToString()); var photos = topic["photos"]?.Select(photo => photo["alt"].ToString()).ToList(); var house = new DBHouse() { Id = Tools.GetUUId(), Location = topic["title"].ToString(), Title = topic["title"].ToString(), OnlineURL = topic["share_url"].ToString(), Text = topic["content"].ToString(), JsonData = topic.ToString(), Price = (int)housePrice, Source = SourceEnum.Douban.GetSourceName(), City = city, RentType = GetRentType(topic["content"].ToString()), PicURLs = JsonConvert.SerializeObject(photos), PubTime = topic["created"].ToObject <DateTime>(), }; houses.Add(house); } return(houses); }
public override List <BaseHouseInfo> ParseHouses(JToken config, string data) { var houses = new List <BaseHouseInfo>(); var resultJObject = JsonConvert.DeserializeObject <JObject>(data); var cityName = config["cityname"].ToString(); foreach (var topic in resultJObject["topics"]) { var housePrice = JiebaTools.GetHousePrice(topic["content"].ToString()); var photos = topic["photos"]?.Select(photo => photo["alt"].ToString()).ToList(); var house = new BaseHouseInfo() { HouseLocation = topic["title"].ToString(), HouseTitle = topic["title"].ToString(), HouseOnlineURL = topic["share_url"].ToString(), HouseText = topic["content"].ToString(), HousePrice = housePrice, IsAnalyzed = true, DisPlayPrice = housePrice > 0 ? $"{housePrice}元" : "", Source = ConstConfigName.Douban, LocationCityName = cityName, Status = 1, PicURLs = JsonConvert.SerializeObject(photos), PubTime = topic["created"].ToObject <DateTime>() }; houses.Add(house); } return(houses); }
private static int GetHousePrice(DBHouse house) { var price = JiebaTools.GetHousePrice(house.Title); if (price == 0 && !string.IsNullOrEmpty(house.Text)) { price = JiebaTools.GetHousePrice(house.Text); } return(price); }
public static void AnalyzeDoubanHouseContent() { LogHelper.Info("AnalyzeDoubanHouseContent Start..."); int index = 0; try { var lstHouse = dataContent.HouseInfos.Where(h => h.Source == ConstConfigurationName.Douban && h.IsAnalyzed == false).Take(100).ToList(); foreach (var houseInfo in lstHouse) { var housePrice = JiebaTools.GetHousePrice(houseInfo.HouseText); string houseTextContent = string.Empty; if (housePrice == 0) { var htmlResult = HTTPHelper.GetHTML(houseInfo.HouseOnlineURL); if (string.IsNullOrEmpty(htmlResult)) { continue; } var page = htmlParser.Parse(htmlResult); var topicContent = page.QuerySelector("div.topic-content"); if (topicContent == null) { continue; } var houseDescription = topicContent.QuerySelector("p"); if (houseDescription == null) { continue; } houseTextContent = houseDescription.TextContent; housePrice = JiebaTools.GetHousePrice(houseDescription.TextContent); } if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent)) { index++; houseInfo.IsAnalyzed = true; } houseInfo.HouseText = houseTextContent; houseInfo.HousePrice = housePrice; } dataContent.SaveChanges(); } catch (Exception ex) { LogHelper.Error("AnalyzeDoubanHouseContent Exception", ex); } LogHelper.Info("AnalyzeDoubanHouseContent Finish,Update Count:" + index); }
public static void AnalyzeDoubanHouseContentAll(bool isSleep = false) { System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Start(); LogHelper.Info("AnalyzeDoubanHouseContent Start..."); Console.WriteLine("AnalyzeDoubanHouseContent Start..."); int index = 0; try { var dal = new DBHouseInfoDAL(); var lstHouse = dal.LoadUnAnalyzeList(); foreach (var houseInfo in lstHouse) { var housePrice = JiebaTools.GetHousePrice(houseInfo.HouseText); string houseTextContent = string.Empty; if (housePrice == 0) { AnalyzeFromWebPage(houseInfo, ref housePrice, ref houseTextContent); } else { houseInfo.Status = 1; houseInfo.DisPlayPrice = housePrice.ToString(CultureInfo.InvariantCulture); houseInfo.HousePrice = housePrice; } houseInfo.IsAnalyzed = true; dal.UpdateHouseInfo(houseInfo); index++; if (index % 100 == 0 && isSleep) { System.Threading.Thread.Sleep(1000 * 120); } Console.WriteLine("HouseInfo:" + Newtonsoft.Json.JsonConvert.SerializeObject(houseInfo)); } } catch (Exception ex) { LogHelper.Error("AnalyzeDoubanHouseContent Exception", ex); } sw.Stop(); var copyTime = sw.Elapsed.TotalSeconds.ToString(CultureInfo.InvariantCulture); LogHelper.Info("AnalyzeDoubanHouseContent Finish,Update Count:" + index + ";花费时间:" + copyTime); Console.WriteLine("AnalyzeDoubanHouseContent Finish,Update Count:" + index + ";花费时间:" + copyTime); }
private static List <BizHouseInfo> GetDataFromOnlineWeb(string groupID, string cityName, int pageIndex) { List <BizHouseInfo> lstHouseInfo = new List <BizHouseInfo>(); var url = $"https://www.douban.com/group/{groupID}/discussion?start={pageIndex * 25}"; var htmlResult = DoubanHTTPHelper.GetHTMLForDouban(url); if (string.IsNullOrEmpty(htmlResult)) { return(lstHouseInfo); } var page = HtmlParser.Parse(htmlResult); var tableElement = page.QuerySelector("table.olt"); if (tableElement == null) { return(lstHouseInfo); } foreach (var trItem in tableElement.QuerySelectorAll("tr")) { var titleItem = trItem.QuerySelector("td.title"); if (titleItem == null || DataContent.HouseInfos.Find(titleItem.QuerySelector("a").GetAttribute("href")) != null) { continue; } var houseTitle = titleItem.QuerySelector("a").GetAttribute("title"); var housePrice = JiebaTools.GetHousePrice(houseTitle); var houseInfo = new BizHouseInfo() { HouseTitle = houseTitle, HouseOnlineURL = titleItem.QuerySelector("a").GetAttribute("href"), HouseLocation = houseTitle, HouseText = houseTitle, DataCreateTime = DateTime.Now, PubTime = titleItem.QuerySelector("td.time") != null ? DateTime.Parse(DateTime.Now.ToString("yyyy-") + titleItem.QuerySelector("td.time").InnerHtml) : DateTime.Now, DisPlayPrice = housePrice > 0 ? $"{housePrice}元":"", Source = ConstConfigurationName.Douban, HousePrice = housePrice, LocationCityName = cityName, IsAnalyzed = housePrice > 0, Status = housePrice > 0 ? 1 : 0, }; lstHouseInfo.Add(houseInfo); } return(lstHouseInfo); }
public override List <DBHouse> ParseHouses(DBConfig config, string data) { var houses = new List <DBHouse>(); var jsonData = JToken.Parse(config.Json); var city = config.City; var htmlDoc = htmlParser.Parse(data); var topics = htmlDoc?.QuerySelector("#TopicsNode")?.QuerySelectorAll("div"); if (topics == null || topics.Count() == 0) { return(houses); } foreach (var topic in topics) { var title = topic.QuerySelector("span.item_title")?.QuerySelector("a"); if (CheckTopic(title)) { var house = new DBHouse(); house.Title = title?.TextContent; house.Location = title?.TextContent; var path = title.GetAttribute("href"); if (path.Contains("#")) { path = path.Split("#").First(); } house.OnlineURL = $"https://www.v2ex.com{path}"; house.JsonData = Newtonsoft.Json.JsonConvert.SerializeObject(new { html = topic.OuterHtml }); house.Source = SourceEnum.V2ex.GetSourceName(); house.Price = JiebaTools.GetHousePrice(title?.TextContent); house.Id = Tools.GetGuid(); house.City = city; house.PicURLs = Tools.GetPicURLs(""); house.PubTime = DateTime.Now; houses.Add(house); } } FillGoodHouseLocation("478aea7d5ba0cfb604106db0a33c7119", city, houses); return(houses); }
public static List <BaseHouseInfo> GetHouseData(string groupID, string cityName, int pageIndex) { List <BaseHouseInfo> lstHouseInfo = new List <BaseHouseInfo>(); var apiURL = $"https://api.douban.com/v2/group/{groupID}/topics?start={pageIndex * 50}&count=50"; LogHelper.Debug($"url:{apiURL},groupID:{groupID}, city:{cityName}"); var result = GetAPIResult(apiURL); if (string.IsNullOrEmpty(result)) { return(lstHouseInfo); } var resultJObject = JsonConvert.DeserializeObject <JObject>(result); foreach (var topic in resultJObject["topics"]) { var housePrice = JiebaTools.GetHousePrice(topic["content"].ToString()); var photos = topic["photos"]?.Select(photo => photo["alt"].ToString()).ToList(); var house = new BaseHouseInfo() { HouseLocation = topic["title"].ToString(), HouseTitle = topic["title"].ToString(), HouseOnlineURL = topic["share_url"].ToString(), HouseText = topic["content"].ToString(), HousePrice = housePrice, IsAnalyzed = true, DisPlayPrice = housePrice > 0 ? $"{housePrice}元" : "", Source = ConstConfigName.Douban, LocationCityName = cityName, Status = 1, PicURLs = JsonConvert.SerializeObject(photos), PubTime = topic["created"].ToObject <DateTime>() }; lstHouseInfo.Add(house); } return(lstHouseInfo); }
private static void AnalyzeFromWebPage(Web.Model.DBHouseInfo houseInfo, ref decimal housePrice, ref string houseTextContent) { var htmlResult = DoubanHTTPHelper.GetHTMLForDouban(houseInfo.HouseOnlineURL); //没有页面信息 if (string.IsNullOrEmpty(htmlResult)) { //404页面 houseInfo.Status = 2; } else { var page = HtmlParser.Parse(htmlResult); var topicContent = page.QuerySelector("div.topic-content"); //没有帖子内容 if (topicContent == null || topicContent.QuerySelector("p") == null || topicContent.QuerySelector("p") == null) { houseInfo.Status = 3; } else { //获取帖子内容 houseTextContent = topicContent.QuerySelector("p").TextContent; //获取价格信息 housePrice = JiebaTools.GetHousePrice(houseTextContent); if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent)) { houseInfo.Status = 1; } houseInfo.DisPlayPrice = housePrice.ToString(CultureInfo.InvariantCulture); houseInfo.HousePrice = housePrice; houseInfo.HouseText = houseTextContent; } } }