private void InitPageWC()
        {
            for (int i = 1; i < 3; i++)
            {
                var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/wuchang/n{0}/", i));
                request.AddProperty("diqu", "五常");
                AddRequest(request);
            }
            for (int i = 1; i < 2; i++)
            {
                var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/weilaikejicheng/n{0}/", i));
                request.AddProperty("diqu", "五常");
                AddRequest(request);
            }
            for (int i = 1; i < 4; i++)
            {
                var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/xianlin/n{0}/", i));
                request.AddProperty("diqu", "闲林");
                AddRequest(request);
            }
            Dictionary <string, object> properties3 = new Dictionary <string, object>();

            properties3.Add("diqu", "西溪");
            for (int i = 1; i < 2; i++)
            {
                var request = BaseFunction.CreateRequest(string.Format("https://hz.5i5j.com/xiaoqu/xixi/n{0}/", i));
                request.AddProperty("diqu", "西溪");
                AddRequest(request);
            }
        }
Esempio n. 2
0
 private void InitPageWC()
 {
     for (int i = 1; i < 3; i++)
     {
         var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/feicuicheng1/pg{0}/", i));
         request.AddProperty("diqu", "翡翠城");
         AddRequest(request);
     }
     for (int i = 1; i < 7; i++)
     {
         var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/weilaikejicheng/pg{0}/", i));
         request.AddProperty("diqu", "未来科技城");
         AddRequest(request);
     }
     for (int i = 1; i < 4; i++)
     {
         var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/xianlin1/pg{0}/", i));
         request.AddProperty("diqu", "闲林");
         AddRequest(request);
     }
     for (int i = 1; i < 5; i++)
     {
         var request = BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/xiaoqu/xixi/pg{0}/", i));
         request.AddProperty("diqu", "西溪");
         AddRequest(request);
     }
 }
        protected override void Handle(Page page)
        {
            ///如果是小区列表页面,则从列表中获取内容页的地址加入到后续目标中
            if (Regex.IsMatch(page.TargetUrl, "https://hz.5i5j.com/xiaoqu/[\\s\\S]+"))
            {
                //获取小区二手房的链接,并加入列表
                var totalCnblogElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='list-con-box']/ul[@class='pList']/li/div[@class='listCon']/div[@class='listX']/div[@class='jia']/a")).Links().GetValues();
                foreach (var cnblogElement in totalCnblogElements)
                {
                    var request = BaseFunction.CreateRequest(cnblogElement);
                    page.AddTargetRequest(request);
                }
                Logger?.LogInformation($"{page.TargetUrl}页面获取到小区连接{ totalCnblogElements.ToList().Count }个");

                //获取每个小区的信息
                var totalXiaoQuElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='list-con-box']/ul[@class='pList']/li")).Nodes();

                var xiaoquList      = new List <O5I5JXiaoQuEntity>();
                var xiaoquPriceList = new List <O5I5JXiaoQuPriceEntity>();
                foreach (var xiaoquElement in totalXiaoQuElements)
                {
                    try
                    {
                        var xiaoqu = new O5I5JXiaoQuEntity();
                        xiaoqu.Name     = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/h3/a/text()")).GetValue();
                        xiaoqu.Url      = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/h3/a/@href")).GetValue();
                        xiaoqu.Describe = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/p[2]")).GetValue().Trim();

                        string[] tempDesc = xiaoqu.Describe.Split('>');
                        if (tempDesc.Length == 3)
                        {
                            xiaoqu.Describe = tempDesc[2].Trim();
                        }

                        xiaoqu.Region = page.Request.Properties["diqu"];
                        xiaoqu.Id     = xiaoqu.Url.Substring(xiaoqu.Url.LastIndexOf('/') + 1, xiaoqu.Url.LastIndexOf('.') - xiaoqu.Url.LastIndexOf('/') - 1);
                        xiaoqu.Url    = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/a/@href")).GetValue();
                        xiaoquList.Add(xiaoqu);

                        var xiaoquPrice = new O5I5JXiaoQuPriceEntity();
                        xiaoquPrice.Id          = xiaoqu.Id;
                        xiaoquPrice.Date1       = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd");
                        xiaoquPrice.Price       = BaseFunction.TryParseDecimal(xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/p[@class='redC']/strong/text()")).GetValue());
                        xiaoquPrice.PriceRange  = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/p[2]/text()")).GetValue();
                        xiaoquPrice.OnSellCount = BaseFunction.TryParseInt(xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/a/p/span/text()")).GetValue());
                        xiaoquPriceList.Add(xiaoquPrice);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex);
                    }
                }
                Logger?.LogInformation($"{page.TargetUrl}页面获取到小区信息{xiaoquList.Count}个,获取到小区价格信息{xiaoquPriceList.Count}个");

                page.AddResultItem("Result", xiaoquList);
                page.AddResultItem("Result1", xiaoquPriceList);
            }
            ///如果是房屋列表页
            else if (Regex.IsMatch(page.TargetUrl, "https://hz.5i5j.com/xq-ershoufang/[\\s\\S]+"))
            {
                //第一页的时候,获取翻页列表
                try
                {
                    if (page.TargetUrl.Length - page.TargetUrl.LastIndexOf('n') > 4)
                    {
                        var pageElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='pageBox']/div[@class='pageSty rf']/a")).Nodes().ToList();

                        if (pageElements != null && pageElements.Count > 2)
                        {
                            for (int i = 1; i < pageElements.Count - 1; ++i)
                            {
                                page.AddTargetRequest(BaseFunction.CreateRequest(pageElements[i].Links().GetValue()));
                            }
                        }

                        Logger?.LogInformation($"{page.TargetUrl}页面获取分页信息{pageElements.Count-2}个");
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }

                string xiaoquID = "";

                Regex regex = new Regex("\\d+");

                var matchs = regex.Matches(page.TargetUrl);

                if (matchs.Count > 0)
                {
                    foreach (Match item in matchs)
                    {
                        if (item.Value.Length > 5)
                        {
                            xiaoquID = item.Value;
                            break;
                        }
                    }
                }



                //获取每个房子的信息
                var totalFangZiElements = page.Selectable().SelectList(Selectors.XPath("//div[@class='list-con-box'][1]/ul/li/div[@class='listCon']")).Nodes();

                var houseList      = new List <O5I5JHouseEntity>();
                var housePriceList = new List <O5I5JHousePriceEntity>();
                foreach (var fangElement in totalFangZiElements)
                {
                    try
                    {
                        var house = new O5I5JHouseEntity();
                        house.Title    = fangElement.Select(Selectors.XPath(".//h3[@class='listTit']/a/text()")).GetValue();
                        house.Url      = fangElement.Select(Selectors.XPath(".//h3[@class='listTit']/a/@href")).GetValue();
                        house.Id       = house.Url.Substring(house.Url.LastIndexOf('/') + 1, house.Url.LastIndexOf('.') - house.Url.LastIndexOf('/') - 1);
                        house.XiaoQuId = xiaoquID;
                        string   huxmj = fangElement.Select(Selectors.XPath(".//div[@class='listX']/p[1]")).GetValue();
                        string[] tt    = huxmj.Split('·');
                        if (tt.Length > 3)
                        {
                            house.MianJi = BaseFunction.TryParseDecimal(tt[1].Replace("平", "").Replace("米", "").Trim());
                            string[] thx = tt[0].Split('>');
                            if (thx.Length == 3)
                            {
                                house.HuXing = thx[2];
                            }
                        }

                        huxmj = fangElement.Select(Selectors.XPath(".//div[@class='listX']/p[3]")).GetValue();
                        tt    = huxmj.Split('·');
                        if (tt.Length > 2)
                        {
                            house.PublicTime = BaseFunction.TryParseDateTime(tt[2].Replace("发", "").Replace("布", "").Trim());
                        }
                        houseList.Add(house);


                        var housePrice = new O5I5JHousePriceEntity();
                        housePrice.Id    = house.Id;
                        housePrice.Date1 = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd");
                        housePrice.Price = BaseFunction.TryParseDecimal(fangElement.Select(Selectors.XPath(".//div[@class='jia']/p[1]/strong/text()")).GetValue());

                        string   ttxx  = fangElement.Select(Selectors.XPath(".//div[@class='jia']/p[2]/text()")).GetValue();
                        string[] ttxx2 = ttxx.Split('/');
                        if (ttxx2.Length == 2)
                        {
                            ttxx = ttxx2[0];
                        }

                        housePrice.SumPrice = BaseFunction.TryParseDecimal(ttxx
                                                                           .Replace("单价", "")
                                                                           .Replace("元", ""));

                        housePriceList.Add(housePrice);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex);
                    }
                }

                Logger?.LogInformation($"{page.TargetUrl}页面获取到二手房信息{houseList.Count}个,获取到二手房价格信息{housePriceList.Count}个");

                page.AddResultItem("Result2", houseList);

                page.AddResultItem("Result3", housePriceList);
            }
        }
Esempio n. 4
0
        protected override void Handle(Page page)
        {
            ///如果是小区列表页面,则从列表中获取内容页的地址加入到后续目标中
            if (Regex.IsMatch(page.TargetUrl, "https://hz.lianjia.com/xiaoqu/[\\s\\S]+"))
            {
                //获取小区二手房的链接,并加入列表
                //var totalCnblogElements = page.Selectable().SelectList(Selectors.XPath("//ul[@class='listContent']/li/div[@class='xiaoquListItemRight']/div[@class='xiaoquListItemSellCount']/a")).Links().GetValues();
                //foreach (var cnblogElement in totalCnblogElements)
                //{
                //    var request = BaseFunction.CreateRequest(cnblogElement);
                //    page.AddTargetRequest(request);
                //}
                //Logger?.LogInformation($"{page.TargetUrl}页面获取到小区连接{ totalCnblogElements.ToList().Count }个");

                //获取每个小区的信息
                var totalXiaoQuElements = page.Selectable().SelectList(Selectors.XPath("//ul[@class='listContent']/li")).Nodes();

                var xiaoquList      = new List <LianJiaXiaoQuEntity>();
                var xiaoquPriceList = new List <LianJiaXiaoQuPriceEntity>();
                foreach (var xiaoquElement in totalXiaoQuElements)
                {
                    try
                    {
                        var xiaoqu = new LianJiaXiaoQuEntity();
                        xiaoqu.Name     = xiaoquElement.Select(Selectors.XPath(".//div[@class='info']/div[@class='title']/a/text()")).GetValue();
                        xiaoqu.Url      = xiaoquElement.Select(Selectors.XPath(".//div[@class='xiaoquListItemSellCount']/a/@href")).GetValue();
                        xiaoqu.Describe = xiaoquElement.Select(Selectors.XPath(".//div[@class='houseInfo']/a/text()")).GetValue().Trim();

                        xiaoqu.Region = page.Request.Properties["diqu"];
                        xiaoqu.Id     = xiaoqu.Url.Substring(xiaoqu.Url.LastIndexOf('c'), xiaoqu.Url.LastIndexOf('/') - xiaoqu.Url.LastIndexOf('c'));
                        xiaoquList.Add(xiaoqu);

                        var xiaoquPrice = new LianJiaXiaoQuPriceEntity();
                        xiaoquPrice.Id    = xiaoqu.Id;
                        xiaoquPrice.Date1 = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd");
                        xiaoquPrice.Price = BaseFunction.TryParseDecimal(xiaoquElement.Select(Selectors.XPath(".//div[@class='xiaoquListItemPrice']/div[@class='totalPrice']/span/text()")).GetValue());
                        //xiaoquPrice.PriceRange = xiaoquElement.Select(Selectors.XPath(".//div[@class='listCon']/div/div/p[2]/text()")).GetValue();
                        xiaoquPrice.OnSellCount = BaseFunction.TryParseInt(xiaoquElement.Select(Selectors.XPath(".//div[@class='xiaoquListItemSellCount']/a/span/text()")).GetValue());
                        xiaoquPriceList.Add(xiaoquPrice);

                        if (xiaoquPrice.OnSellCount > 0)
                        {
                            page.AddTargetRequest(BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/ershoufang/{0}/", xiaoqu.Id)));
                        }

                        if (xiaoquPrice.OnSellCount > 30)
                        {
                            int i = 1 + xiaoquPrice.OnSellCount / 30;

                            Logger?.LogInformation($"{xiaoqu.Id}小区在售{xiaoquPrice.OnSellCount}个,分{i}页");

                            for (int index = 2; index <= i; ++index)
                            {
                                page.AddTargetRequest(BaseFunction.CreateRequest(string.Format("https://hz.lianjia.com/ershoufang/pg{0}{1}/", index, xiaoqu.Id)));
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex);
                    }
                }
                Logger?.LogInformation($"{page.TargetUrl}页面获取到小区信息{xiaoquList.Count}个,获取到小区价格信息{xiaoquPriceList.Count}个");

                page.AddResultItem("Result", xiaoquList);
                page.AddResultItem("Result1", xiaoquPriceList);
            }
            ///如果是房屋列表页
            else if (Regex.IsMatch(page.TargetUrl, "https://hz.lianjia.com/ershoufang/[\\s\\S]+"))
            {
                string xiaoquID = "";

                Regex regex = new Regex("c\\d+");

                var matchs = regex.Matches(page.TargetUrl);

                if (matchs.Count > 0)
                {
                    foreach (Match item in matchs)
                    {
                        if (item.Value.Length > 5)
                        {
                            xiaoquID = item.Value;
                            break;
                        }
                    }
                }

                //获取每个房子的信息
                var totalFangZiElements = page.Selectable().SelectList(Selectors.XPath("//ul[@class='sellListContent']/li/div[@class='info clear']")).Nodes();

                var houseList      = new List <LianJiaHouseEntity>();
                var housePriceList = new List <LianJiaHousePriceEntity>();
                foreach (var fangElement in totalFangZiElements)
                {
                    try
                    {
                        var house = new LianJiaHouseEntity();
                        house.Title    = fangElement.Select(Selectors.XPath(".//div[@class='title']/a/text()")).GetValue();
                        house.Url      = fangElement.Select(Selectors.XPath(".//div[@class='title']/a/@href")).GetValue();
                        house.Id       = house.Url.Substring(house.Url.LastIndexOf('/') + 1, house.Url.LastIndexOf('.') - house.Url.LastIndexOf('/') - 1);
                        house.XiaoQuId = xiaoquID;
                        string huxmj = fangElement.Select(Selectors.XPath(".//div[@class='address']/div[@class='houseInfo']")).GetValue();
                        if (huxmj.Length > 3)
                        {
                            Regex mianjiregex = new Regex("\\d+[\\.\\d+]*平米");
                            var   mianjimatch = mianjiregex.Match(huxmj);
                            if (mianjimatch != null)
                            {
                                house.MianJi = BaseFunction.TryParseDecimal(mianjimatch.Value.Replace("平", "").Replace("米", "").Trim());
                            }

                            Regex huxingregex = new Regex("\\d+室[\\d+厅]*");
                            var   huxingmatch = huxingregex.Match(huxmj);
                            if (huxingmatch != null)
                            {
                                house.HuXing = huxingmatch.Value;
                            }
                        }

                        //huxmj = fangElement.Select(Selectors.XPath(".//div[@class='listX']/p[3]")).GetValue();
                        //tt = huxmj.Split('·');
                        //if (tt.Length > 2)
                        //{
                        //    house.PublicTime = BaseFunction.TryParseDateTime(tt[2].Replace("发", "").Replace("布", "").Trim());
                        //}
                        houseList.Add(house);


                        var housePrice = new LianJiaHousePriceEntity();
                        housePrice.Id    = house.Id;
                        housePrice.Date1 = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day);//DateTime.Now.ToString("yyyy-MM-dd");
                        housePrice.Price = BaseFunction.TryParseDecimal(fangElement.Select(Selectors.XPath(".//div[@class='priceInfo']/div[@class='totalPrice']/span/text()")).GetValue());

                        string   ttxx  = fangElement.Select(Selectors.XPath(".//div[@class='priceInfo']/div[@class='unitPrice']/span/text()")).GetValue();
                        string[] ttxx2 = ttxx.Split('/');
                        if (ttxx2.Length == 2)
                        {
                            ttxx = ttxx2[0];
                        }

                        housePrice.SumPrice = BaseFunction.TryParseDecimal(ttxx
                                                                           .Replace("单价", "")
                                                                           .Replace("元", ""));

                        housePriceList.Add(housePrice);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex);
                    }
                }

                Logger?.LogInformation($"{page.TargetUrl}页面获取到二手房信息{houseList.Count}个,获取到二手房价格信息{housePriceList.Count}个");

                page.AddResultItem("Result2", houseList);

                page.AddResultItem("Result3", housePriceList);
            }
        }