Пример #1
0
        /// <summary>
        /// 抓取求租
        /// </summary>
        /// <param name="area"></param>
        public void CrawlDataBegRent(Area area)
        {
            try
            {
                Crawler         crawler   = new Crawler();
                string          url       = area.Url + ConstVar.求租 + "0/";
                string          html      = crawler.Crawl(url, Encoding.UTF8);
                var             htmlParse = new HtmlParser();
                IHtmlDocument   docuement = htmlParse.Parse(html);
                List <IElement> eles      = docuement.QuerySelectorAll("div").ToList().Where(p => p.ClassName == "pager")
                                            .ToList();
                if (eles.Count > 0)
                {
                    IHtmlDocument   htmlA    = htmlParse.Parse(eles[0].InnerHtml);
                    List <IElement> spanEles = htmlA.QuerySelectorAll("span").ToList();


                    int page = 0;
                    if (spanEles.Count > 2)
                    {
                        IElement el = spanEles[spanEles.Count - 2];
                        page = int.Parse(el.InnerHtml);
                    }
                    else
                    {
                        page = 1;
                    }


                    List <string> urls = new List <string>();//分页
                    for (int i = 1; i <= page; i++)
                    {
                        string str = url + "pn" + i + "/";
                        urls.Add(str);
                    }

                    foreach (var e in urls)
                    {
                        try
                        {
                            //抓取每页
                            string htmlB = crawler.Crawl(e, Encoding.UTF8);

                            IDocument docuemnt = htmlParse.Parse(htmlB);
                            IElement  eleist   = docuemnt.QuerySelectorAll("ul").Where(p => p.ClassName == "house-list-wrap")
                                                 .ToList().FirstOrDefault();
                            IDocument       docuementC = htmlParse.Parse(eleist.InnerHtml);
                            List <IElement> eliss      = docuementC.QuerySelectorAll("div").Where(p => p.ClassName == "list-info")
                                                         .ToList();
                            //抓取每条
                            Parallel.For(0, eliss.Count + 1, i =>
                            {
                                string itemUrl = string.Empty;
                                try
                                {
                                    IDocument documentD = htmlParse.Parse(eliss[i].InnerHtml);

                                    IElement eloo = documentD.QuerySelector("a");
                                    itemUrl       = eloo.GetAttribute("href");
                                    var htmlE     = crawler.Crawl(itemUrl, Encoding.UTF8);

                                    //开始解析
                                    IDocument documentE = htmlParse.Parse(htmlE);
                                    IElement time       = documentE.QuerySelectorAll("div")
                                                          .FirstOrDefault(o => o.ClassName == "other");
                                    string update = time.InnerHtml.Substring(0, time.InnerHtml.IndexOf("<"))
                                                    .Replace("发布时间:", "").Trim();
                                    DateTime updateime =
                                        ParseTool.StringToDateTime(ParseTool.StringToDateTime(update)
                                                                   .ToShortDateString());
                                    if (updateime > DateTime.Now.AddMonths(-2))
                                    {
                                        //标题
                                        string InfoTitle = documentE.QuerySelectorAll("h1").FirstOrDefault().InnerHtml
                                                           .Trim();
                                        //详细内容
                                        string InfoContent = documentE.QuerySelectorAll("div")
                                                             .FirstOrDefault(u => u.ClassName == "maincon").InnerHtml.Trim();

                                        //电话
                                        string Phone = documentE.QuerySelectorAll("span")
                                                       .FirstOrDefault(u => u.ClassName == "phone").InnerHtml.Trim();
                                        //租金
                                        string rentMoney = documentE.QuerySelectorAll("em")
                                                           .FirstOrDefault(u => u.ClassName == "redfont").InnerHtml.Trim();
                                        //面积
                                        string areasize =
                                            htmlParse.Parse(documentE.QuerySelectorAll("ul")
                                                            .FirstOrDefault(u => u.ClassName == "info").InnerHtml)
                                            .QuerySelectorAll("li").ToList()[2].InnerHtml.Replace("面积:", "")
                                            .Replace("㎡", "").Trim();
                                        //客户名
                                        string customerName = documentE.QuerySelectorAll("a")
                                                              .Where(u => u.ClassName == "tx").ToList()[1].InnerHtml.Trim();
                                        var infolilist = htmlParse
                                                         .Parse(documentE.QuerySelectorAll("ul")
                                                                .FirstOrDefault(u => u.ClassName == "info")
                                                                .InnerHtml).QuerySelectorAll("li");
                                        //区域名字
                                        string AreaName = string.Join(",",
                                                                      htmlParse.Parse(infolilist[0].InnerHtml).QuerySelectorAll("a")
                                                                      .Select(p => p.InnerHtml.Trim()).ToList()).Trim();
                                        ShopBegRent shop  = new ShopBegRent();
                                        shop.AreaName     = AreaName;
                                        shop.AreaId       = area.Id.ToString();
                                        shop.InfoContent  = InfoContent;
                                        shop.InfoTitle    = InfoTitle;
                                        shop.Phone        = Phone;
                                        shop.MaxRentMoney = ParseTool.StringToDouble(rentMoney) + 1000;
                                        shop.MinRentMoney = (ParseTool.StringToDouble(rentMoney) - 1000) > 0
                                            ? (ParseTool.StringToDouble(rentMoney) - 1000)
                                            : 0;
                                        shop.Customer   = customerName;
                                        shop.UpdateTime = updateime;
                                        if (areasize.Contains("-"))
                                        {
                                            string[] areasizes = areasize.Split('-');
                                            shop.MinArea       = ParseTool.StringToDouble(areasizes[0]);
                                            shop.MaxArea       = ParseTool.StringToDouble(areasizes[1]);
                                        }
                                        else
                                        {
                                            shop.MinArea = ParseTool.StringToDouble(areasize) - 10 > 0
                                                ? double.Parse(areasize) - 10
                                                : 0;
                                            shop.MaxArea = ParseTool.StringToDouble(areasize) + 10;
                                        }

                                        shop.UpdateTime = updateime;
                                        shop.Id         = Guid.NewGuid();
                                        shopbegrepo.Add(shop);


                                        Console.WriteLine(area.Name + "添加了一条商铺求租");
                                    }
                                }
                                catch (Exception exception)
                                {
                                    errorUrlrepsitory.Add(new ErrorUrl()
                                    {
                                        Url = itemUrl, UrlType = UrlType.Item
                                    });
                                    log.Error(exception.ToString());
                                }
                            });
                        }
                        catch (Exception exception)
                        {
                            errorUrlrepsitory.Add(new ErrorUrl()
                            {
                                Url = e, UrlType = UrlType.Page
                            });
                            log.Error(exception.ToString());
                        }
                    }
                }

                Console.WriteLine("抓取" + area.Name + "求租信息完成");
            }
            catch (Exception e)
            {
                log.Error(e.ToString());
                Console.WriteLine(e);
            }
        }