コード例 #1
0
        /// <summary>
        /// 抓取求租
        /// </summary>
        /// <param name="area"></param>
        public void CrawlDataBegRent(Area area)
        {
            try
            {
                Crawler         crawler   = new Crawler();
                string          url       = area.Url + ConstVar.求租 + "0/";
                string          html      = crawler.Crawl(url, Encoding.UTF8);
                var             htmlParse = new HtmlParser();
                IHtmlDocument   docuement = htmlParse.Parse(html);
                List <IElement> eles      = docuement.QuerySelectorAll("div").ToList().Where(p => p.ClassName == "pager")
                                            .ToList();
                if (eles.Count > 0)
                {
                    IHtmlDocument   htmlA    = htmlParse.Parse(eles[0].InnerHtml);
                    List <IElement> spanEles = htmlA.QuerySelectorAll("span").ToList();


                    int page = 0;
                    if (spanEles.Count > 2)
                    {
                        IElement el = spanEles[spanEles.Count - 2];
                        page = int.Parse(el.InnerHtml);
                    }
                    else
                    {
                        page = 1;
                    }


                    List <string> urls = new List <string>();//分页
                    for (int i = 1; i <= page; i++)
                    {
                        string str = url + "pn" + i + "/";
                        urls.Add(str);
                    }

                    foreach (var e in urls)
                    {
                        try
                        {
                            //抓取每页
                            string htmlB = crawler.Crawl(e, Encoding.UTF8);

                            IDocument docuemnt = htmlParse.Parse(htmlB);
                            IElement  eleist   = docuemnt.QuerySelectorAll("ul").Where(p => p.ClassName == "house-list-wrap")
                                                 .ToList().FirstOrDefault();
                            IDocument       docuementC = htmlParse.Parse(eleist.InnerHtml);
                            List <IElement> eliss      = docuementC.QuerySelectorAll("div").Where(p => p.ClassName == "list-info")
                                                         .ToList();
                            //抓取每条
                            Parallel.For(0, eliss.Count + 1, i =>
                            {
                                string itemUrl = string.Empty;
                                try
                                {
                                    IDocument documentD = htmlParse.Parse(eliss[i].InnerHtml);

                                    IElement eloo = documentD.QuerySelector("a");
                                    itemUrl       = eloo.GetAttribute("href");
                                    var htmlE     = crawler.Crawl(itemUrl, Encoding.UTF8);

                                    //开始解析
                                    IDocument documentE = htmlParse.Parse(htmlE);
                                    IElement time       = documentE.QuerySelectorAll("div")
                                                          .FirstOrDefault(o => o.ClassName == "other");
                                    string update = time.InnerHtml.Substring(0, time.InnerHtml.IndexOf("<"))
                                                    .Replace("发布时间:", "").Trim();
                                    DateTime updateime =
                                        ParseTool.StringToDateTime(ParseTool.StringToDateTime(update)
                                                                   .ToShortDateString());
                                    if (updateime > DateTime.Now.AddMonths(-2))
                                    {
                                        //标题
                                        string InfoTitle = documentE.QuerySelectorAll("h1").FirstOrDefault().InnerHtml
                                                           .Trim();
                                        //详细内容
                                        string InfoContent = documentE.QuerySelectorAll("div")
                                                             .FirstOrDefault(u => u.ClassName == "maincon").InnerHtml.Trim();

                                        //电话
                                        string Phone = documentE.QuerySelectorAll("span")
                                                       .FirstOrDefault(u => u.ClassName == "phone").InnerHtml.Trim();
                                        //租金
                                        string rentMoney = documentE.QuerySelectorAll("em")
                                                           .FirstOrDefault(u => u.ClassName == "redfont").InnerHtml.Trim();
                                        //面积
                                        string areasize =
                                            htmlParse.Parse(documentE.QuerySelectorAll("ul")
                                                            .FirstOrDefault(u => u.ClassName == "info").InnerHtml)
                                            .QuerySelectorAll("li").ToList()[2].InnerHtml.Replace("面积:", "")
                                            .Replace("㎡", "").Trim();
                                        //客户名
                                        string customerName = documentE.QuerySelectorAll("a")
                                                              .Where(u => u.ClassName == "tx").ToList()[1].InnerHtml.Trim();
                                        var infolilist = htmlParse
                                                         .Parse(documentE.QuerySelectorAll("ul")
                                                                .FirstOrDefault(u => u.ClassName == "info")
                                                                .InnerHtml).QuerySelectorAll("li");
                                        //区域名字
                                        string AreaName = string.Join(",",
                                                                      htmlParse.Parse(infolilist[0].InnerHtml).QuerySelectorAll("a")
                                                                      .Select(p => p.InnerHtml.Trim()).ToList()).Trim();
                                        ShopBegRent shop  = new ShopBegRent();
                                        shop.AreaName     = AreaName;
                                        shop.AreaId       = area.Id.ToString();
                                        shop.InfoContent  = InfoContent;
                                        shop.InfoTitle    = InfoTitle;
                                        shop.Phone        = Phone;
                                        shop.MaxRentMoney = ParseTool.StringToDouble(rentMoney) + 1000;
                                        shop.MinRentMoney = (ParseTool.StringToDouble(rentMoney) - 1000) > 0
                                            ? (ParseTool.StringToDouble(rentMoney) - 1000)
                                            : 0;
                                        shop.Customer   = customerName;
                                        shop.UpdateTime = updateime;
                                        if (areasize.Contains("-"))
                                        {
                                            string[] areasizes = areasize.Split('-');
                                            shop.MinArea       = ParseTool.StringToDouble(areasizes[0]);
                                            shop.MaxArea       = ParseTool.StringToDouble(areasizes[1]);
                                        }
                                        else
                                        {
                                            shop.MinArea = ParseTool.StringToDouble(areasize) - 10 > 0
                                                ? double.Parse(areasize) - 10
                                                : 0;
                                            shop.MaxArea = ParseTool.StringToDouble(areasize) + 10;
                                        }

                                        shop.UpdateTime = updateime;
                                        shop.Id         = Guid.NewGuid();
                                        shopbegrepo.Add(shop);


                                        Console.WriteLine(area.Name + "添加了一条商铺求租");
                                    }
                                }
                                catch (Exception exception)
                                {
                                    errorUrlrepsitory.Add(new ErrorUrl()
                                    {
                                        Url = itemUrl, UrlType = UrlType.Item
                                    });
                                    log.Error(exception.ToString());
                                }
                            });
                        }
                        catch (Exception exception)
                        {
                            errorUrlrepsitory.Add(new ErrorUrl()
                            {
                                Url = e, UrlType = UrlType.Page
                            });
                            log.Error(exception.ToString());
                        }
                    }
                }

                Console.WriteLine("抓取" + area.Name + "求租信息完成");
            }
            catch (Exception e)
            {
                log.Error(e.ToString());
                Console.WriteLine(e);
            }
        }
コード例 #2
0
        /// <summary>
        /// 抓取出租信息
        /// </summary>
        /// <param name="area"></param>
        public void CrawlDataCz(Area area)
        {
            string url = string.Empty;

            try
            {
                Crawler crawler = new Crawler();
                url = area.Url + ConstVar.出租 + "0/";
                string          html      = crawler.Crawl(url, Encoding.UTF8);
                var             htmlParse = new HtmlParser();
                IHtmlDocument   docuement = htmlParse.Parse(html);
                List <IElement> eles      = docuement.QuerySelectorAll("div").ToList().Where(p => p.ClassName == "pager")
                                            .ToList();
                if (eles.Count > 0)
                {
                    IHtmlDocument   htmlA    = htmlParse.Parse(eles[0].InnerHtml);
                    List <IElement> spanEles = htmlA.QuerySelectorAll("span").ToList();


                    int page = 0;
                    if (spanEles.Count > 2)
                    {
                        IElement el = spanEles[spanEles.Count - 2];
                        page = int.Parse(el.InnerHtml);
                    }
                    else
                    {
                        page = 1;
                    }

                    for (int i = 1; i < page + 1; i++)
                    {
                        string str = string.Empty;
                        try
                        {
                            str = url + "pn" + i + "/";
                            Crawler crawlerA = new Crawler();
                            string  htmlB    = crawlerA.Crawl(str, Encoding.UTF8);

                            IDocument docuemnt = htmlParse.Parse(htmlB);
                            IElement  eleist   = docuemnt.QuerySelectorAll("ul")
                                                 .Where(p => p.ClassName == "house-list-wrap").ToList().FirstOrDefault();


                            IDocument       docuementC = htmlParse.Parse(eleist.InnerHtml);
                            List <IElement> eliss      = docuementC.QuerySelectorAll("div").Where(p => p.ClassName == "pic")
                                                         .ToList();
                            Parallel.ForEach(eliss, p =>
                            {
                                string urlA = string.Empty;
                                try
                                {
                                    IDocument documentD = htmlParse.Parse(p.InnerHtml);

                                    IElement eloo = documentD.QuerySelector("a");
                                    urlA          = eloo.GetAttribute("href").ToString();
                                    var htmlE     = crawler.Crawl(eloo.GetAttribute("href").ToString(), Encoding.UTF8);


                                    IDocument documentE = htmlParse.Parse(htmlE);
                                    IElement ele        = documentE.QuerySelectorAll("span")
                                                          .Where(o => o.InnerHtml.StartsWith("更新于")).FirstOrDefault();
                                    DateTime time = ParseTool.StringToDateTime(ele.InnerHtml.Replace("更新于", ""));
                                    if (time > DateTime.Now.AddMonths(-2))
                                    {
                                        IElement InfoTitleElee = documentE.QuerySelectorAll("h1")
                                                                 .FirstOrDefault(o => o.ClassName == "c_000 f20");

                                        IElement money = documentE.QuerySelectorAll("span")
                                                         .FirstOrDefault(o => o.ClassName == "house_basic_title_money_num");
                                        var InfoContent = documentE.QuerySelectorAll("div")
                                                          .Where(o => o.ClassName == "general-item-wrap").FirstOrDefault(u =>
                                                                                                                         u.ParentElement.ClassName == "general-item general-miaoshu");
                                        var Customer = documentE.QuerySelectorAll("span")
                                                       .FirstOrDefault(o => o.ClassName == "f14 c_333 jjrsay");
                                        var phone = documentE.QuerySelectorAll("p")
                                                    .FirstOrDefault(o => o.ClassName == "phone-num");


                                        var InfoEles = htmlParse.Parse(documentE.QuerySelectorAll("ul")
                                                                       .FirstOrDefault(o => o.ClassName == "house_basic_title_content")
                                                                       ?.InnerHtml).QuerySelectorAll("li").ToList();
                                        //面积
                                        IElement areasize = htmlParse.Parse(InfoEles[0].InnerHtml)
                                                            .QuerySelectorAll("span")
                                                            .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item2");
                                        //行业名字
                                        IElement IndustryName = htmlParse.Parse(InfoEles[2].InnerHtml)
                                                                .QuerySelectorAll("span")
                                                                .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item3");
                                        IElement address = htmlParse.Parse(InfoEles[5].InnerHtml).QuerySelectorAll("a")
                                                           .FirstOrDefault(o =>
                                                                           o.ClassName == "house_basic_title_content_item3 blue-link");
                                        IElement addressDetail = htmlParse.Parse(InfoEles[5].InnerHtml)
                                                                 .QuerySelectorAll("span")
                                                                 .FirstOrDefault(o =>
                                                                                 o.ClassName == "house_basic_title_content_item3 xxdz-des");

                                        var shoptransfer = new ShopRentOrTransfer()
                                        {
                                            Id            = Guid.NewGuid(),
                                            ShopArea      = areasize == null ? "" : areasize.InnerHtml,
                                            InfoTitle     = InfoTitleElee == null ? "" : InfoTitleElee.InnerHtml,
                                            TransFerMoney = money == null ? "" : money.InnerHtml,
                                            Address       = address == null ? "" : string.Join("", address.InnerHtml),
                                            DetailAddress = addressDetail == null ? "" : addressDetail.InnerHtml,
                                            InfoContent   = InfoContent == null ? "" : InfoContent.InnerHtml,
                                            InfoType      = Model.BaseModel.InfoType.出租,
                                            IndustryName  = IndustryName == null ? "" : IndustryName.InnerHtml,
                                            Customer      = Customer == null ? "" : Customer.InnerHtml,
                                            Phone         = phone == null ? "" : phone.InnerHtml,
                                            AreaId        = area.Id.ToString(),
                                            UpdateTime    = time
                                        };
                                        var imgUl = documentE.QuerySelectorAll("ul")
                                                    .FirstOrDefault(o => o.ClassName == "general-pic-list");

                                        object obj    = shoprepo.Add(shoptransfer);
                                        bool resultId = (bool)obj;


                                        Console.WriteLine(area.Name + "添加一条出租信息");
                                        if (imgUl != null && resultId)
                                        {
                                            IDocument documentf = htmlParse.Parse(imgUl.InnerHtml);
                                            var tem             = documentf.QuerySelectorAll("img")
                                                                  .Select(o => o.GetAttribute("data-src"));
                                            if (tem != null && tem.Count() > 0)
                                            {
                                                foreach (var o in tem)
                                                {
                                                    if (o != null)
                                                    {
                                                        Bitmap img = crawler.CrawlPic(o);
                                                        if (img != null)
                                                        {
                                                            string path =
                                                                AppDomain.CurrentDomain.BaseDirectory + "Imgs/" +
                                                                shoptransfer.Id + "/";
                                                            if (!Directory.Exists(path))
                                                            {
                                                                Directory.CreateDirectory(path);
                                                            }

                                                            string fullPath =
                                                                path + Guid.NewGuid().ToString().Replace("-", "") +
                                                                ".png";
                                                            img.Save(fullPath);
                                                            string savePath = fullPath.Replace(
                                                                AppDomain.CurrentDomain.BaseDirectory,
                                                                "");
                                                            imgrepo.Add(new Model.Image()
                                                            {
                                                                FkId     = shoptransfer.Id,
                                                                ImageUrl = savePath,
                                                                InfoType = TableType.ShopRentOrTransfer,
                                                            });
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                                catch (Exception e)
                                {
                                    errorUrlrepsitory.Add(new ErrorUrl()
                                    {
                                        UrlType = UrlType.Item, Url = urlA
                                    });
                                    log.Error(e.ToString());
                                }
                            });
                        }
                        catch (Exception e)
                        {
                            errorUrlrepsitory.Add(new ErrorUrl()
                            {
                                UrlType = UrlType.Page, Url = str
                            });
                            log.Error(e.ToString());
                        }
                    }
                }

                Console.WriteLine(area.Name + "出租信息抓取完成");
            }
            catch (Exception e)
            {
                errorUrlrepsitory.Add(new ErrorUrl()
                {
                    UrlType = UrlType.All, Url = url
                });
                log.Error(e.ToString());
            }
        }