public static void CapturPinPaiHouseInfo()
        {
            foreach (var crawlerConfiguration in dataContent.CrawlerConfigurations.Where(c => c.ConfigurationName
                                                                                         == ConstConfigurationName.PinPaiGongYu && c.IsEnabled).ToList())
            {
                LogHelper.RunActionNotThrowEx(() =>
                {
                    var confInfo = Newtonsoft.Json.JsonConvert.DeserializeObject <dynamic>(crawlerConfiguration.ConfigurationValue);

                    for (var index = 0; index < confInfo.pagecount.Value; index++)
                    {
                        var url        = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/{index}";
                        var htmlResult = HTTPHelper.GetHTMLByURL(url);
                        var page       = new HtmlParser().Parse(htmlResult);
                        var lstLi      = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr"));
                        if (lstLi == null || lstLi.Count() == 0)
                        {
                            continue;
                        }
                        GetDataOnPageDoc(confInfo, page);
                        dataContent.SaveChanges();
                    }
                }, "CapturPinPaiHouseInfo", crawlerConfiguration);
            }
        }
        public static void AnalyzeDoubanHouseContent()
        {
            LogHelper.Info("AnalyzeDoubanHouseContent Start...");
            int index = 0;

            try
            {
                var lstHouse = dataContent.HouseInfos.Where(h =>
                                                            h.Source == ConstConfigurationName.Douban && h.IsAnalyzed == false).Take(100).ToList();


                foreach (var houseInfo in lstHouse)
                {
                    var    housePrice       = JiebaTools.GetHousePrice(houseInfo.HouseText);
                    string houseTextContent = string.Empty;
                    if (housePrice == 0)
                    {
                        var htmlResult = HTTPHelper.GetHTML(houseInfo.HouseOnlineURL);
                        if (string.IsNullOrEmpty(htmlResult))
                        {
                            continue;
                        }
                        var page         = htmlParser.Parse(htmlResult);
                        var topicContent = page.QuerySelector("div.topic-content");
                        if (topicContent == null)
                        {
                            continue;
                        }
                        var houseDescription = topicContent.QuerySelector("p");
                        if (houseDescription == null)
                        {
                            continue;
                        }
                        houseTextContent = houseDescription.TextContent;
                        housePrice       = JiebaTools.GetHousePrice(houseDescription.TextContent);
                    }

                    if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent))
                    {
                        index++;
                        houseInfo.IsAnalyzed = true;
                    }
                    houseInfo.HouseText  = houseTextContent;
                    houseInfo.HousePrice = housePrice;
                }
                dataContent.SaveChanges();
            }
            catch (Exception ex)
            {
                LogHelper.Error("AnalyzeDoubanHouseContent Exception", ex);
            }


            LogHelper.Info("AnalyzeDoubanHouseContent Finish,Update Count:" + index);
        }
        private static List <BizHouseInfo> GetDataFromOnlineWeb(string groupID, string cityName, int pageIndex)
        {
            HashSet <string> hsDoubanHouseURL = new HashSet <string>();

            dataContent.HouseInfos.Where(h => h.Source == ConstConfigurationName.Douban)
            .Select(h => h.HouseOnlineURL).Distinct().ToList()
            .ForEach(houseURL =>
            {
                if (!hsDoubanHouseURL.Contains(houseURL))
                {
                    hsDoubanHouseURL.Add(houseURL);
                }
            });

            List <BizHouseInfo> lstHouseInfo = new List <BizHouseInfo>();

            var url        = $"https://www.douban.com/group/{groupID}/discussion?start={pageIndex * 25}";
            var htmlResult = HTTPHelper.GetHTML(url);

            if (string.IsNullOrEmpty(htmlResult))
            {
                return(lstHouseInfo);
            }
            var page = htmlParser.Parse(htmlResult);

            foreach (var trItem in page.QuerySelector("table.olt").QuerySelectorAll("tr"))
            {
                var titleItem = trItem.QuerySelector("td.title");
                if (titleItem == null || hsDoubanHouseURL.Contains(titleItem.QuerySelector("a").GetAttribute("href")))
                {
                    continue;
                }

                var houseInfo = new BizHouseInfo()
                {
                    HouseTitle     = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseOnlineURL = titleItem.QuerySelector("a").GetAttribute("href"),
                    HouseLocation  = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseText      = titleItem.QuerySelector("a").GetAttribute("title"),
                    DataCreateTime = DateTime.Now,
                    PubTime        = titleItem.QuerySelector("td.time") != null
                    ? DateTime.Parse(DateTime.Now.ToString("yyyy-") + titleItem.QuerySelector("td.time").InnerHtml)
                    : DateTime.Now,
                    DisPlayPrice     = "",
                    Source           = ConstConfigurationName.Douban,
                    HousePrice       = 0,
                    LocationCityName = cityName
                };
                lstHouseInfo.Add(houseInfo);
            }
            return(lstHouseInfo);
        }
Exemple #4
0
        private static void GetDataByWebAPI(int pageNum, HashSet <string> hsHouseOnlineUrl)
        {
            var dicParameter = new JObject()
            {
                { "uid", "" },
                { "pageNum", $"{pageNum}" },
                { "sortType", "1" },
                { "sellRentType", "2" },
                { "searchCondition", "{}" }
            };
            var postHouseUrl  = $"http://www.huzhumaifang.com:8080/hzmf-integration/getHouseList.action?content={JsonConvert.SerializeObject(dicParameter)}";
            var resultJson    = HTTPHelper.GetJsonResultByURL(postHouseUrl);
            var resultJObject = JsonConvert.DeserializeObject <JObject>(resultJson);
            var lstHouseInfo  = from houseInfo in resultJObject["houseList"]
                                select new
            {
                houseCreateTime = houseInfo["houseCreateTime"],
                houseRentPrice  = houseInfo["houseRentPrice"],
                houseDescript   = houseInfo["houseDescript"],
                houseId         = houseInfo["houseId"]
            };

            var tmp = new List <MutualHouseInfo>();



            foreach (var houseInfo in lstHouseInfo)
            {
                var houseUrl = $"http://www.huzhumaifang.com/Renting/house_detail/id/{houseInfo.houseId.ToObject<Int32>()}.html";
                if (hsHouseOnlineUrl.Contains(houseUrl))
                {
                    continue;
                }

                var desc = houseInfo.houseDescript.ToObject <string>().Replace("😄", "");
                DataContent.MutualHouseInfos.Add(new MutualHouseInfo()
                {
                    HouseOnlineURL   = houseUrl,
                    HouseLocation    = desc,
                    HousePrice       = houseInfo.houseRentPrice.ToObject <Int32>(),
                    HouseText        = desc,
                    DataCreateTime   = DateTime.Now,
                    HouseTitle       = desc,
                    DisPlayPrice     = houseInfo.houseRentPrice.ToString(),
                    LocationCityName = "上海",
                    PubTime          = houseInfo.houseCreateTime.ToObject <DateTime>(),
                    Source           = ConstConfigurationName.HuZhuZuFang,
                });
            }
            DataContent.SaveChanges();
        }
Exemple #5
0
 /// <summary>
 /// 过滤无效的城市配置
 /// </summary>
 public static void FilterInvalidCityConfig()
 {
     foreach (var doubanConf in DataContent.CrawlerConfigurations.Where(c => c.ConfigurationName
                                                                        == ConstConfigurationName.PinPaiGongYu).ToList())
     {
         var confInfo   = JsonConvert.DeserializeObject <dynamic>(doubanConf.ConfigurationValue);
         var url        = $"http://{confInfo.shortcutname.Value}.58.com/pinpaigongyu/pn/0";
         var htmlResult = HTTPHelper.GetHTMLByURL(url);
         var page       = new HtmlParser().Parse(htmlResult);
         var lstLi      = page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr"));
         if (!lstLi.Any())
         {
             doubanConf.IsEnabled = false;
         }
     }
     DataContent.SaveChanges();
 }
        public static void GetDataFromOnlineWeb(string groupID, int index, string cityName)
        {
            var url        = $"https://www.douban.com/group/{groupID}/discussion?start={index * 25}";
            var htmlResult = HTTPHelper.GetHTML(url);

            if (string.IsNullOrEmpty(htmlResult))
            {
                return;
            }
            var page = htmlParser.Parse(htmlResult);

            foreach (var trItem in page.QuerySelector("table.olt").QuerySelectorAll("tr"))
            {
                var titleItem = trItem.QuerySelector("td.title");
                if (titleItem == null)
                {
                    continue;
                }

                var houseInfo = new BizHouseInfo()
                {
                    HouseTitle     = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseOnlineURL = titleItem.QuerySelector("a").GetAttribute("href"),
                    HouseLocation  = titleItem.QuerySelector("a").GetAttribute("title"),
                    HouseText      = titleItem.QuerySelector("a").GetAttribute("title"),
                    DataCreateTime = DateTime.Now,
                    PubTime        = titleItem.QuerySelector("td.time") != null
                    ? DateTime.Parse(DateTime.Now.ToString("yyyy-") + titleItem.QuerySelector("td.time").InnerHtml)
                    : DateTime.Now,
                    DisPlayPrice     = "",
                    SoureceDaminURL  = "www.douban.com",
                    HousePrice       = 0,
                    LocationCityName = cityName
                };
                dataContent.Add(houseInfo);
            }
            dataContent.SaveChanges();
        }
        private static void AnalyzeFromWebPage(Web.Model.DBHouseInfo houseInfo,
                                               ref decimal housePrice, ref string houseTextContent)
        {
            var htmlResult = HTTPHelper.GetHTML(houseInfo.HouseOnlineURL);

            //没有页面信息
            if (string.IsNullOrEmpty(htmlResult))
            {
                //404页面
                houseInfo.Status = 2;
            }
            else
            {
                var page         = HtmlParser.Parse(htmlResult);
                var topicContent = page.QuerySelector("div.topic-content");
                //没有帖子内容
                if (topicContent == null || topicContent.QuerySelector("p") == null || topicContent.QuerySelector("p") == null)
                {
                    houseInfo.Status = 3;
                }
                else
                {
                    //获取帖子内容
                    houseTextContent = topicContent.QuerySelector("p").TextContent;
                    //获取价格信息
                    housePrice = JiebaTools.GetHousePrice(houseTextContent);
                    if (housePrice != 0 || !string.IsNullOrEmpty(houseTextContent))
                    {
                        houseInfo.Status = 1;
                    }
                    houseInfo.DisPlayPrice = housePrice.ToString(CultureInfo.InvariantCulture);
                    houseInfo.HousePrice   = housePrice;
                    houseInfo.HouseText    = houseTextContent;
                }
            }
        }