Ejemplo n.º 1
0
        /// <summary>
        /// 抓取出租信息
        /// </summary>
        /// <param name="area"></param>
        public void CrawlDataCz(Area area)
        {
            string url = string.Empty;

            try
            {
                Crawler crawler = new Crawler();
                url = area.Url + ConstVar.出租 + "0/";
                string          html      = crawler.Crawl(url, Encoding.UTF8);
                var             htmlParse = new HtmlParser();
                IHtmlDocument   docuement = htmlParse.Parse(html);
                List <IElement> eles      = docuement.QuerySelectorAll("div").ToList().Where(p => p.ClassName == "pager")
                                            .ToList();
                if (eles.Count > 0)
                {
                    IHtmlDocument   htmlA    = htmlParse.Parse(eles[0].InnerHtml);
                    List <IElement> spanEles = htmlA.QuerySelectorAll("span").ToList();


                    int page = 0;
                    if (spanEles.Count > 2)
                    {
                        IElement el = spanEles[spanEles.Count - 2];
                        page = int.Parse(el.InnerHtml);
                    }
                    else
                    {
                        page = 1;
                    }

                    for (int i = 1; i < page + 1; i++)
                    {
                        string str = string.Empty;
                        try
                        {
                            str = url + "pn" + i + "/";
                            Crawler crawlerA = new Crawler();
                            string  htmlB    = crawlerA.Crawl(str, Encoding.UTF8);

                            IDocument docuemnt = htmlParse.Parse(htmlB);
                            IElement  eleist   = docuemnt.QuerySelectorAll("ul")
                                                 .Where(p => p.ClassName == "house-list-wrap").ToList().FirstOrDefault();


                            IDocument       docuementC = htmlParse.Parse(eleist.InnerHtml);
                            List <IElement> eliss      = docuementC.QuerySelectorAll("div").Where(p => p.ClassName == "pic")
                                                         .ToList();
                            Parallel.ForEach(eliss, p =>
                            {
                                string urlA = string.Empty;
                                try
                                {
                                    IDocument documentD = htmlParse.Parse(p.InnerHtml);

                                    IElement eloo = documentD.QuerySelector("a");
                                    urlA          = eloo.GetAttribute("href").ToString();
                                    var htmlE     = crawler.Crawl(eloo.GetAttribute("href").ToString(), Encoding.UTF8);


                                    IDocument documentE = htmlParse.Parse(htmlE);
                                    IElement ele        = documentE.QuerySelectorAll("span")
                                                          .Where(o => o.InnerHtml.StartsWith("更新于")).FirstOrDefault();
                                    DateTime time = ParseTool.StringToDateTime(ele.InnerHtml.Replace("更新于", ""));
                                    if (time > DateTime.Now.AddMonths(-2))
                                    {
                                        IElement InfoTitleElee = documentE.QuerySelectorAll("h1")
                                                                 .FirstOrDefault(o => o.ClassName == "c_000 f20");

                                        IElement money = documentE.QuerySelectorAll("span")
                                                         .FirstOrDefault(o => o.ClassName == "house_basic_title_money_num");
                                        var InfoContent = documentE.QuerySelectorAll("div")
                                                          .Where(o => o.ClassName == "general-item-wrap").FirstOrDefault(u =>
                                                                                                                         u.ParentElement.ClassName == "general-item general-miaoshu");
                                        var Customer = documentE.QuerySelectorAll("span")
                                                       .FirstOrDefault(o => o.ClassName == "f14 c_333 jjrsay");
                                        var phone = documentE.QuerySelectorAll("p")
                                                    .FirstOrDefault(o => o.ClassName == "phone-num");


                                        var InfoEles = htmlParse.Parse(documentE.QuerySelectorAll("ul")
                                                                       .FirstOrDefault(o => o.ClassName == "house_basic_title_content")
                                                                       ?.InnerHtml).QuerySelectorAll("li").ToList();
                                        //面积
                                        IElement areasize = htmlParse.Parse(InfoEles[0].InnerHtml)
                                                            .QuerySelectorAll("span")
                                                            .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item2");
                                        //行业名字
                                        IElement IndustryName = htmlParse.Parse(InfoEles[2].InnerHtml)
                                                                .QuerySelectorAll("span")
                                                                .FirstOrDefault(o => o.ClassName == "house_basic_title_content_item3");
                                        IElement address = htmlParse.Parse(InfoEles[5].InnerHtml).QuerySelectorAll("a")
                                                           .FirstOrDefault(o =>
                                                                           o.ClassName == "house_basic_title_content_item3 blue-link");
                                        IElement addressDetail = htmlParse.Parse(InfoEles[5].InnerHtml)
                                                                 .QuerySelectorAll("span")
                                                                 .FirstOrDefault(o =>
                                                                                 o.ClassName == "house_basic_title_content_item3 xxdz-des");

                                        var shoptransfer = new ShopRentOrTransfer()
                                        {
                                            Id            = Guid.NewGuid(),
                                            ShopArea      = areasize == null ? "" : areasize.InnerHtml,
                                            InfoTitle     = InfoTitleElee == null ? "" : InfoTitleElee.InnerHtml,
                                            TransFerMoney = money == null ? "" : money.InnerHtml,
                                            Address       = address == null ? "" : string.Join("", address.InnerHtml),
                                            DetailAddress = addressDetail == null ? "" : addressDetail.InnerHtml,
                                            InfoContent   = InfoContent == null ? "" : InfoContent.InnerHtml,
                                            InfoType      = Model.BaseModel.InfoType.出租,
                                            IndustryName  = IndustryName == null ? "" : IndustryName.InnerHtml,
                                            Customer      = Customer == null ? "" : Customer.InnerHtml,
                                            Phone         = phone == null ? "" : phone.InnerHtml,
                                            AreaId        = area.Id.ToString(),
                                            UpdateTime    = time
                                        };
                                        var imgUl = documentE.QuerySelectorAll("ul")
                                                    .FirstOrDefault(o => o.ClassName == "general-pic-list");

                                        object obj    = shoprepo.Add(shoptransfer);
                                        bool resultId = (bool)obj;


                                        Console.WriteLine(area.Name + "添加一条出租信息");
                                        if (imgUl != null && resultId)
                                        {
                                            IDocument documentf = htmlParse.Parse(imgUl.InnerHtml);
                                            var tem             = documentf.QuerySelectorAll("img")
                                                                  .Select(o => o.GetAttribute("data-src"));
                                            if (tem != null && tem.Count() > 0)
                                            {
                                                foreach (var o in tem)
                                                {
                                                    if (o != null)
                                                    {
                                                        Bitmap img = crawler.CrawlPic(o);
                                                        if (img != null)
                                                        {
                                                            string path =
                                                                AppDomain.CurrentDomain.BaseDirectory + "Imgs/" +
                                                                shoptransfer.Id + "/";
                                                            if (!Directory.Exists(path))
                                                            {
                                                                Directory.CreateDirectory(path);
                                                            }

                                                            string fullPath =
                                                                path + Guid.NewGuid().ToString().Replace("-", "") +
                                                                ".png";
                                                            img.Save(fullPath);
                                                            string savePath = fullPath.Replace(
                                                                AppDomain.CurrentDomain.BaseDirectory,
                                                                "");
                                                            imgrepo.Add(new Model.Image()
                                                            {
                                                                FkId     = shoptransfer.Id,
                                                                ImageUrl = savePath,
                                                                InfoType = TableType.ShopRentOrTransfer,
                                                            });
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                                catch (Exception e)
                                {
                                    errorUrlrepsitory.Add(new ErrorUrl()
                                    {
                                        UrlType = UrlType.Item, Url = urlA
                                    });
                                    log.Error(e.ToString());
                                }
                            });
                        }
                        catch (Exception e)
                        {
                            errorUrlrepsitory.Add(new ErrorUrl()
                            {
                                UrlType = UrlType.Page, Url = str
                            });
                            log.Error(e.ToString());
                        }
                    }
                }

                Console.WriteLine(area.Name + "出租信息抓取完成");
            }
            catch (Exception e)
            {
                errorUrlrepsitory.Add(new ErrorUrl()
                {
                    UrlType = UrlType.All, Url = url
                });
                log.Error(e.ToString());
            }
        }