public static void AnalyzeDoubanHouseContent()
        {
            LogHelper.Info("AnalyzeDoubanHouseContent Start...");
            int index = 0;

            try
            {
                var lstHouse = dataContent.HouseInfos.Where(h =>
                                                            h.Source == ConstConfigurationName.Douban && h.IsAnalyzed == false).Take(100).ToList();


                foreach (var houseInfo in lstHouse)
                {
                    var    housePrice       = JiebaTools.GetHousePrice(houseInfo.HouseText);
                    string houseTextContent = string.Empty;
                    if (housePrice == 0)
                    {
                        var htmlResult = HTTPHelper.GetHTML(houseInfo.HouseOnlineURL);
                        if (string.IsNullOrEmpty(htmlResult))
                        {
                            continue;
                        }
                        var page         = htmlParser.Parse(htmlResult);
                        var topicContent = page.QuerySelector("div.topic-content");
                        if (topicContent == null)
                        {
                            continue;
                        }
                        var houseDescription = topicContent.QuerySelector("p");
                        if (houseDescription == null)
                        {
                            continue;
                        }
                        houseTextContent = houseDescription.TextContent;
                        housePrice       = JiebaTools.GetHousePrice(houseDescription.TextContent);
                    }

                    if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent))
                    {
                        index++;
                        houseInfo.IsAnalyzed = true;
                    }
                    houseInfo.HouseText  = houseTextContent;
                    houseInfo.HousePrice = housePrice;
                }
                dataContent.SaveChanges();
            }
            catch (Exception ex)
            {
                LogHelper.Error("AnalyzeDoubanHouseContent Exception", ex);
            }


            LogHelper.Info("AnalyzeDoubanHouseContent Finish,Update Count:" + index);
        }
예제 #2
0
        private static List <BizHouseInfo> GetDataFromOnlineWeb(string groupID, string cityName, int pageIndex)
        {
            HashSet <string> hsDoubanHouseURL = new HashSet <string>();

            dataContent.HouseInfos.Where(h => h.Source == ConstConfigurationName.Douban)
            .Select(h => h.HouseOnlineURL).Distinct().ToList()
            .ForEach(houseURL =>
            {
                if (!hsDoubanHouseURL.Contains(houseURL))
                {
                    hsDoubanHouseURL.Add(houseURL);
                }
            });

            List <BizHouseInfo> lstHouseInfo = new List <BizHouseInfo>();

            var url        = $"https://www.douban.com/group/{groupID}/discussion?start={pageIndex * 25}";
            var htmlResult = HTTPHelper.GetHTML(url);

            if (string.IsNullOrEmpty(htmlResult))
            {
                return(lstHouseInfo);
            }
            var page = htmlParser.Parse(htmlResult);

            foreach (var trItem in page.QuerySelector("table.olt").QuerySelectorAll("tr"))
            {
                var titleItem = trItem.QuerySelector("td.title");
                if (titleItem == null || hsDoubanHouseURL.Contains(titleItem.QuerySelector("a").GetAttribute("href")))
                {
                    continue;
                }

                var houseInfo = new BizHouseInfo()
                {
                    HouseTitle     = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseOnlineURL = titleItem.QuerySelector("a").GetAttribute("href"),
                    HouseLocation  = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseText      = titleItem.QuerySelector("a").GetAttribute("title"),
                    DataCreateTime = DateTime.Now,
                    PubTime        = titleItem.QuerySelector("td.time") != null
                    ? DateTime.Parse(DateTime.Now.ToString("yyyy-") + titleItem.QuerySelector("td.time").InnerHtml)
                    : DateTime.Now,
                    DisPlayPrice     = "",
                    Source           = ConstConfigurationName.Douban,
                    HousePrice       = 0,
                    LocationCityName = cityName
                };
                lstHouseInfo.Add(houseInfo);
            }
            return(lstHouseInfo);
        }
예제 #3
0
        public static void GetDataFromOnlineWeb(string groupID, int index, string cityName)
        {
            var url        = $"https://www.douban.com/group/{groupID}/discussion?start={index * 25}";
            var htmlResult = HTTPHelper.GetHTML(url);

            if (string.IsNullOrEmpty(htmlResult))
            {
                return;
            }
            var page = htmlParser.Parse(htmlResult);

            foreach (var trItem in page.QuerySelector("table.olt").QuerySelectorAll("tr"))
            {
                var titleItem = trItem.QuerySelector("td.title");
                if (titleItem == null)
                {
                    continue;
                }

                var houseInfo = new BizHouseInfo()
                {
                    HouseTitle     = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseOnlineURL = titleItem.QuerySelector("a").GetAttribute("href"),
                    HouseLocation  = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseText      = titleItem.QuerySelector("a").GetAttribute("title"),
                    DataCreateTime = DateTime.Now,
                    PubTime        = titleItem.QuerySelector("td.time") != null
                    ? DateTime.Parse(DateTime.Now.ToString("yyyy-") + titleItem.QuerySelector("td.time").InnerHtml)
                    : DateTime.Now,
                    DisPlayPrice     = "",
                    SoureceDaminURL  = "www.douban.com",
                    HousePrice       = 0,
                    LocationCityName = cityName
                };
                dataContent.Add(houseInfo);
            }
            dataContent.SaveChanges();
        }
예제 #4
0
        private static void AnalyzeFromWebPage(Web.Model.DBHouseInfo houseInfo,
                                               ref decimal housePrice, ref string houseTextContent)
        {
            var htmlResult = HTTPHelper.GetHTML(houseInfo.HouseOnlineURL);

            //没有页面信息
            if (string.IsNullOrEmpty(htmlResult))
            {
                //404页面
                houseInfo.Status = 2;
            }
            else
            {
                var page         = HtmlParser.Parse(htmlResult);
                var topicContent = page.QuerySelector("div.topic-content");
                //没有帖子内容
                if (topicContent == null || topicContent.QuerySelector("p") == null || topicContent.QuerySelector("p") == null)
                {
                    houseInfo.Status = 3;
                }
                else
                {
                    //获取帖子内容
                    houseTextContent = topicContent.QuerySelector("p").TextContent;
                    //获取价格信息
                    housePrice = JiebaTools.GetHousePrice(houseTextContent);
                    if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent))
                    {
                        houseInfo.Status = 1;
                    }
                    houseInfo.DisPlayPrice = housePrice.ToString(CultureInfo.InvariantCulture);
                    houseInfo.HousePrice   = housePrice;
                    houseInfo.HouseText    = houseTextContent;
                }
            }
        }