예제 #1
0
        public static List <DoubanHouseInfo> GetDataFromAPI(string groupID, string cityName, int pageIndex)
        {
            List <DoubanHouseInfo> lstHouseInfo = new List <DoubanHouseInfo>();
            var apiURL      = $"https://api.douban.com/v2/group/{groupID}/topics?start={pageIndex * 50}";
            var doubanTopic = WebAPIHelper.GetAPIResult <DoubanTopic>(apiURL);

            if (doubanTopic != null && doubanTopic.topics != null)
            {
                foreach (var topic in doubanTopic.topics)
                {
                    if (DataContent.DoubanHouseInfos.Any(h => h.HouseOnlineURL == topic.share_url))
                    {
                        continue;
                    }
                    var housePrice = JiebaTools.GetHousePrice(topic.content);
                    var house      = new DoubanHouseInfo()
                    {
                        HouseLocation    = topic.title,
                        HouseTitle       = topic.title,
                        HouseOnlineURL   = topic.share_url,
                        HouseText        = topic.content,
                        HousePrice       = JiebaTools.GetHousePrice(topic.content),
                        IsAnalyzed       = true,
                        DisPlayPrice     = housePrice > 0 ? $"{housePrice}元" : "",
                        Source           = ConstConfigurationName.Douban,
                        LocationCityName = cityName,
                        Status           = 1,
                        PubTime          = DateTime.Parse(topic.created),
                        DataCreateTime   = DateTime.Now,
                    };
                    lstHouseInfo.Add(house);
                }
            }
            return(lstHouseInfo);
        }
예제 #2
0
        public override List <DBHouse> ParseHouses(DBConfig config, string data)
        {
            var houses        = new List <DBHouse>();
            var resultJObject = JsonConvert.DeserializeObject <JObject>(data);
            var city          = config.City;

            foreach (var topic in resultJObject["topics"])
            {
                var housePrice = JiebaTools.GetHousePrice(topic["content"].ToString());
                var photos     = topic["photos"]?.Select(photo => photo["alt"].ToString()).ToList();
                var house      = new DBHouse()
                {
                    Id        = Tools.GetUUId(),
                    Location  = topic["title"].ToString(),
                    Title     = topic["title"].ToString(),
                    OnlineURL = topic["share_url"].ToString(),
                    Text      = topic["content"].ToString(),
                    JsonData  = topic.ToString(),
                    Price     = (int)housePrice,
                    Source    = SourceEnum.Douban.GetSourceName(),
                    City      = city,
                    RentType  = GetRentType(topic["content"].ToString()),
                    PicURLs   = JsonConvert.SerializeObject(photos),
                    PubTime   = topic["created"].ToObject <DateTime>(),
                };
                houses.Add(house);
            }
            return(houses);
        }
예제 #3
0
        public override List <BaseHouseInfo> ParseHouses(JToken config, string data)
        {
            var houses        = new List <BaseHouseInfo>();
            var resultJObject = JsonConvert.DeserializeObject <JObject>(data);
            var cityName      = config["cityname"].ToString();

            foreach (var topic in resultJObject["topics"])
            {
                var housePrice = JiebaTools.GetHousePrice(topic["content"].ToString());
                var photos     = topic["photos"]?.Select(photo => photo["alt"].ToString()).ToList();
                var house      = new BaseHouseInfo()
                {
                    HouseLocation    = topic["title"].ToString(),
                    HouseTitle       = topic["title"].ToString(),
                    HouseOnlineURL   = topic["share_url"].ToString(),
                    HouseText        = topic["content"].ToString(),
                    HousePrice       = housePrice,
                    IsAnalyzed       = true,
                    DisPlayPrice     = housePrice > 0 ? $"{housePrice}元" : "",
                    Source           = ConstConfigName.Douban,
                    LocationCityName = cityName,
                    Status           = 1,
                    PicURLs          = JsonConvert.SerializeObject(photos),
                    PubTime          = topic["created"].ToObject <DateTime>()
                };
                houses.Add(house);
            }
            return(houses);
        }
예제 #4
0
        private static int GetHousePrice(DBHouse house)
        {
            var price = JiebaTools.GetHousePrice(house.Title);

            if (price == 0 && !string.IsNullOrEmpty(house.Text))
            {
                price = JiebaTools.GetHousePrice(house.Text);
            }
            return(price);
        }
        public static void AnalyzeDoubanHouseContent()
        {
            LogHelper.Info("AnalyzeDoubanHouseContent Start...");
            int index = 0;

            try
            {
                var lstHouse = dataContent.HouseInfos.Where(h =>
                                                            h.Source == ConstConfigurationName.Douban && h.IsAnalyzed == false).Take(100).ToList();


                foreach (var houseInfo in lstHouse)
                {
                    var    housePrice       = JiebaTools.GetHousePrice(houseInfo.HouseText);
                    string houseTextContent = string.Empty;
                    if (housePrice == 0)
                    {
                        var htmlResult = HTTPHelper.GetHTML(houseInfo.HouseOnlineURL);
                        if (string.IsNullOrEmpty(htmlResult))
                        {
                            continue;
                        }
                        var page         = htmlParser.Parse(htmlResult);
                        var topicContent = page.QuerySelector("div.topic-content");
                        if (topicContent == null)
                        {
                            continue;
                        }
                        var houseDescription = topicContent.QuerySelector("p");
                        if (houseDescription == null)
                        {
                            continue;
                        }
                        houseTextContent = houseDescription.TextContent;
                        housePrice       = JiebaTools.GetHousePrice(houseDescription.TextContent);
                    }

                    if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent))
                    {
                        index++;
                        houseInfo.IsAnalyzed = true;
                    }
                    houseInfo.HouseText  = houseTextContent;
                    houseInfo.HousePrice = housePrice;
                }
                dataContent.SaveChanges();
            }
            catch (Exception ex)
            {
                LogHelper.Error("AnalyzeDoubanHouseContent Exception", ex);
            }


            LogHelper.Info("AnalyzeDoubanHouseContent Finish,Update Count:" + index);
        }
예제 #6
0
        public static void AnalyzeDoubanHouseContentAll(bool isSleep = false)
        {
            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            LogHelper.Info("AnalyzeDoubanHouseContent Start...");
            Console.WriteLine("AnalyzeDoubanHouseContent Start...");
            int index = 0;

            try
            {
                var dal      = new DBHouseInfoDAL();
                var lstHouse = dal.LoadUnAnalyzeList();
                foreach (var houseInfo in lstHouse)
                {
                    var    housePrice       = JiebaTools.GetHousePrice(houseInfo.HouseText);
                    string houseTextContent = string.Empty;
                    if (housePrice == 0)
                    {
                        AnalyzeFromWebPage(houseInfo, ref housePrice, ref houseTextContent);
                    }
                    else
                    {
                        houseInfo.Status       = 1;
                        houseInfo.DisPlayPrice = housePrice.ToString(CultureInfo.InvariantCulture);
                        houseInfo.HousePrice   = housePrice;
                    }

                    houseInfo.IsAnalyzed = true;
                    dal.UpdateHouseInfo(houseInfo);
                    index++;

                    if (index % 100 == 0 && isSleep)
                    {
                        System.Threading.Thread.Sleep(1000 * 120);
                    }

                    Console.WriteLine("HouseInfo:" + Newtonsoft.Json.JsonConvert.SerializeObject(houseInfo));
                }
            }
            catch (Exception ex)
            {
                LogHelper.Error("AnalyzeDoubanHouseContent Exception", ex);
            }

            sw.Stop();

            var copyTime = sw.Elapsed.TotalSeconds.ToString(CultureInfo.InvariantCulture);

            LogHelper.Info("AnalyzeDoubanHouseContent Finish,Update Count:" + index + ";花费时间:" + copyTime);
            Console.WriteLine("AnalyzeDoubanHouseContent Finish,Update Count:" + index + ";花费时间:" + copyTime);
        }
예제 #7
0
        private static List <BizHouseInfo> GetDataFromOnlineWeb(string groupID, string cityName, int pageIndex)
        {
            List <BizHouseInfo> lstHouseInfo = new List <BizHouseInfo>();
            var url = $"https://www.douban.com/group/{groupID}/discussion?start={pageIndex * 25}";

            var htmlResult = DoubanHTTPHelper.GetHTMLForDouban(url);

            if (string.IsNullOrEmpty(htmlResult))
            {
                return(lstHouseInfo);
            }
            var page         = HtmlParser.Parse(htmlResult);
            var tableElement = page.QuerySelector("table.olt");

            if (tableElement == null)
            {
                return(lstHouseInfo);
            }

            foreach (var trItem in tableElement.QuerySelectorAll("tr"))
            {
                var titleItem = trItem.QuerySelector("td.title");
                if (titleItem == null || DataContent.HouseInfos.Find(titleItem.QuerySelector("a").GetAttribute("href")) != null)
                {
                    continue;
                }
                var houseTitle = titleItem.QuerySelector("a").GetAttribute("title");
                var housePrice = JiebaTools.GetHousePrice(houseTitle);
                var houseInfo  = new BizHouseInfo()
                {
                    HouseTitle     = houseTitle,
                    HouseOnlineURL = titleItem.QuerySelector("a").GetAttribute("href"),
                    HouseLocation  = houseTitle,
                    HouseText      = houseTitle,
                    DataCreateTime = DateTime.Now,
                    PubTime        = titleItem.QuerySelector("td.time") != null
                    ? DateTime.Parse(DateTime.Now.ToString("yyyy-") + titleItem.QuerySelector("td.time").InnerHtml)
                    : DateTime.Now,
                    DisPlayPrice     = housePrice > 0 ? $"{housePrice}元":"",
                    Source           = ConstConfigurationName.Douban,
                    HousePrice       = housePrice,
                    LocationCityName = cityName,
                    IsAnalyzed       = housePrice > 0,
                    Status           = housePrice > 0 ? 1 : 0,
                };
                lstHouseInfo.Add(houseInfo);
            }
            return(lstHouseInfo);
        }
예제 #8
0
        public override List <DBHouse> ParseHouses(DBConfig config, string data)
        {
            var houses   = new List <DBHouse>();
            var jsonData = JToken.Parse(config.Json);
            var city     = config.City;
            var htmlDoc  = htmlParser.Parse(data);
            var topics   = htmlDoc?.QuerySelector("#TopicsNode")?.QuerySelectorAll("div");

            if (topics == null || topics.Count() == 0)
            {
                return(houses);
            }
            foreach (var topic in topics)
            {
                var title = topic.QuerySelector("span.item_title")?.QuerySelector("a");
                if (CheckTopic(title))
                {
                    var house = new DBHouse();
                    house.Title    = title?.TextContent;
                    house.Location = title?.TextContent;
                    var path = title.GetAttribute("href");
                    if (path.Contains("#"))
                    {
                        path = path.Split("#").First();
                    }
                    house.OnlineURL = $"https://www.v2ex.com{path}";
                    house.JsonData  = Newtonsoft.Json.JsonConvert.SerializeObject(new { html = topic.OuterHtml });
                    house.Source    = SourceEnum.V2ex.GetSourceName();
                    house.Price     = JiebaTools.GetHousePrice(title?.TextContent);
                    house.Id        = Tools.GetGuid();
                    house.City      = city;
                    house.PicURLs   = Tools.GetPicURLs("");
                    house.PubTime   = DateTime.Now;
                    houses.Add(house);
                }
            }
            FillGoodHouseLocation("478aea7d5ba0cfb604106db0a33c7119", city, houses);
            return(houses);
        }
예제 #9
0
        public static List <BaseHouseInfo> GetHouseData(string groupID, string cityName, int pageIndex)
        {
            List <BaseHouseInfo> lstHouseInfo = new List <BaseHouseInfo>();
            var apiURL = $"https://api.douban.com/v2/group/{groupID}/topics?start={pageIndex * 50}&count=50";

            LogHelper.Debug($"url:{apiURL},groupID:{groupID}, city:{cityName}");
            var result = GetAPIResult(apiURL);

            if (string.IsNullOrEmpty(result))
            {
                return(lstHouseInfo);
            }
            var resultJObject = JsonConvert.DeserializeObject <JObject>(result);

            foreach (var topic in resultJObject["topics"])
            {
                var housePrice = JiebaTools.GetHousePrice(topic["content"].ToString());
                var photos     = topic["photos"]?.Select(photo => photo["alt"].ToString()).ToList();
                var house      = new BaseHouseInfo()
                {
                    HouseLocation    = topic["title"].ToString(),
                    HouseTitle       = topic["title"].ToString(),
                    HouseOnlineURL   = topic["share_url"].ToString(),
                    HouseText        = topic["content"].ToString(),
                    HousePrice       = housePrice,
                    IsAnalyzed       = true,
                    DisPlayPrice     = housePrice > 0 ? $"{housePrice}元" : "",
                    Source           = ConstConfigName.Douban,
                    LocationCityName = cityName,
                    Status           = 1,
                    PicURLs          = JsonConvert.SerializeObject(photos),
                    PubTime          = topic["created"].ToObject <DateTime>()
                };
                lstHouseInfo.Add(house);
            }

            return(lstHouseInfo);
        }
예제 #10
0
        private static void AnalyzeFromWebPage(Web.Model.DBHouseInfo houseInfo,
                                               ref decimal housePrice, ref string houseTextContent)
        {
            var htmlResult = DoubanHTTPHelper.GetHTMLForDouban(houseInfo.HouseOnlineURL);

            //没有页面信息
            if (string.IsNullOrEmpty(htmlResult))
            {
                //404页面
                houseInfo.Status = 2;
            }
            else
            {
                var page         = HtmlParser.Parse(htmlResult);
                var topicContent = page.QuerySelector("div.topic-content");
                //没有帖子内容
                if (topicContent == null || topicContent.QuerySelector("p") == null || topicContent.QuerySelector("p") == null)
                {
                    houseInfo.Status = 3;
                }
                else
                {
                    //获取帖子内容
                    houseTextContent = topicContent.QuerySelector("p").TextContent;
                    //获取价格信息
                    housePrice = JiebaTools.GetHousePrice(houseTextContent);
                    if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent))
                    {
                        houseInfo.Status = 1;
                    }
                    houseInfo.DisPlayPrice = housePrice.ToString(CultureInfo.InvariantCulture);
                    houseInfo.HousePrice   = housePrice;
                    houseInfo.HouseText    = houseTextContent;
                }
            }
        }