Exemplo n.º 1
0
        private IEnumerable <HouseInfo> GetRoomList(string url)
        {
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var page       = new HtmlParser().Parse(htmlResult);

            return(page.QuerySelector("ul.screening_left_ul").QuerySelectorAll("li").Select(room =>
            {
                var screening_time = room.QuerySelector("p.screening_time").TextContent;
                var screening_price = room.QuerySelector("h5").TextContent;
                var locationInfo = room.QuerySelector("a");
                var locationContent = locationInfo.TextContent.Split(',').FirstOrDefault();
                var location = locationContent.Remove(0, locationContent.IndexOf("租") + 1);

                int housePrice = 0;
                int.TryParse(screening_price.Replace("¥", "").Replace("元/月", ""), out housePrice);

                var markBGType = LocationMarkBGType.SelectColor(housePrice / 1000);

                return new HouseInfo
                {
                    Money = screening_price,
                    HouseURL = "http://www.huzhumaifang.com" + locationInfo.GetAttribute("href"),
                    HouseLocation = location,
                    HouseTime = screening_time,
                    HousePrice = housePrice,
                    LocationMarkBG = markBGType,
                };
            }));
        }
        private int GetPageNum(int costFrom, int costTo, string cnName)
        {
            var url        = $"http://{cnName}.58.com/zufang/pn1/?isreal=true&minprice={costFrom}_{costTo}";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);

            return(ParsePages(htmlResult));
        }
Exemplo n.º 3
0
        private int GetPageCount(string indexURL)
        {
            var htmlResult = HTTPHelper.GetHTMLByURL(indexURL);
            var page       = new HtmlParser().Parse(htmlResult);

            return(Convert.ToInt32(page.QuerySelector("a.end")?.TextContent ?? "0"));
        }
Exemplo n.º 4
0
        /// <summary>
        /// 从在线网页提取数据
        /// </summary>
        /// <param name="i"></param>
        private static void CrawlerMovieInfoFromOnline(string indexURL, int movieType)
        {
            var newMovieCount = 0;
            var htmlDoc       = HTTPHelper.GetHTMLByURL(indexURL);
            var dom           = htmlParser.Parse(htmlDoc);

            dom.QuerySelector("div.co_content8")
            ?.QuerySelectorAll("a")
            .Where(a => a.GetAttribute("href").StartsWith("/i/"))
            .ForEach(a =>
            {
                var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href");
                if (!MovieDataContent.resource.Any(mo => mo.OnlineUrl == onlineURL))
                {
                    var movieInfo = GetMovieInfoFromURL(onlineURL);
                    if (movieInfo != null)
                    {
                        movieInfo.MovieType = MovieType.Latest;
                        MovieDataContent.resource.Add(movieInfo);
                        newMovieCount++;
                    }
                }
            });
            MovieDataContent.SaveChanges();
            //LogHelper.Info($"Finish Dy2018 Crawl {movieType.ToString()}MovieInfo,New Data Count:{newMovieCount},IndexURL:{indexURL}");
        }
        private int GetPageNumByIndex(string cnName)
        {
            var url        = $"http://{cnName}.58.com/zufang/pn1/?isreal=true";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);

            return(ParsePages(htmlResult));
        }
Exemplo n.º 6
0
        private IEnumerable <HouseInfo> GetRoomListByIndex(string cnName, int index)
        {
            var url        = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var page       = new HtmlParser().Parse(htmlResult);
            var houseList  = page.QuerySelectorAll("tr[logr]").Where(room => room.QuerySelector("b.pri") != null).Select(room =>
            {
                decimal housePrice = 0;
                decimal.TryParse(room.QuerySelector("b.pri").TextContent, out housePrice);
                var markBGType = (housePrice / 1000) > (int)LocationMarkBGType.Black ? LocationMarkBGType.Black : (LocationMarkBGType)(housePrice / 1000);
                return(new HouseInfo
                {
                    // HouseLocation=room.QuerySelector("a.a_xq1").TextContent.Replace("租房",""),
                    HouseLocation = room.QuerySelector("span.f12") != null && !string.IsNullOrEmpty(room.QuerySelector("span.f12").TextContent) ?
                                    room.QuerySelector("span.f12").TextContent.Replace("租房", "") : room.QuerySelector("a.a_xq1") != null && !string.IsNullOrEmpty(room.QuerySelector("a.a_xq1").TextContent) ?
                                    room.QuerySelector("a.a_xq1").TextContent.Replace("租房", "") : "",
                    HouseTitle = room.QuerySelector("a.t") != null ? room.QuerySelector("a.t").TextContent : "",
                    Money = room.QuerySelector("b.pri") != null ? room.QuerySelector("b.pri").TextContent : "",
                    HouseURL = $"http://{cnName}.58.com/zufang/{room.GetAttribute("logr").Split('_')[3]}x.shtml",
                    LocationMarkBG = markBGType.ToString() + ".png",
                });
            });

            return(houseList.Where(room => !string.IsNullOrEmpty(room.HouseLocation) && !string.IsNullOrEmpty(room.HouseTitle) && !string.IsNullOrEmpty(room.Money)));
        }
        private IEnumerable <HouseInfo> GetRoomList(string url)
        {
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var page       = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlResult);
            var uiInfo     = page.GetElementsByClassName("screening_left_ul");

            return(uiInfo.FirstOrDefault().QuerySelectorAll("li").Select(element =>
            {
                var screening_time = element.QuerySelector("p.screening_time").TextContent;
                var screening_price = element.QuerySelector("h5").TextContent;
                var locationInfo = element.QuerySelectorAll("a").FirstOrDefault();
                var locationInfoContent = locationInfo.TextContent;
                var locationContent = locationInfoContent.Split(',')[0];
                var location = locationContent.Remove(0, locationContent.IndexOf("租") + 1);

                decimal housePrice = 0;
                decimal.TryParse(screening_price.Replace("¥", "").Replace("元/月", ""), out housePrice);

                var markBGType = (housePrice / 1000) > (int)LocationMarkBGType.Black ? LocationMarkBGType.Black : (LocationMarkBGType)(housePrice / 1000);

                return (new HouseInfo()
                {
                    Money = screening_price,
                    HouseURL = "http://www.huzhumaifang.com" + locationInfo.GetAttribute("href"),
                    HouseLocation = location,
                    HouseTime = screening_time,
                    HousePrice = housePrice,
                    LocationMarkBG = markBGType.ToString() + ".PNG",
                });
            }));
        }
        private IEnumerable <HouseInfo> GetRoomList(int costFrom, int costTo, string cnName, int index)
        {
            var url        = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true&minprice={costFrom}_{costTo}";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var houseList  = ParseRoom(htmlResult);

            return(houseList);
        }
        private IEnumerable <HouseInfo> GetRoomListByIndex(string cnName, int index)
        {
            var url        = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var houseList  = ParseRoom(htmlResult);

            return(houseList);
        }
Exemplo n.º 10
0
        private int GetListSum(int costFrom, int costTo, string cnName)
        {
            var url        = $"http://{cnName}.58.com/pinpaigongyu/pn/{1}/?minprice={costFrom}_{costTo}";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var dom        = new HtmlParser().Parse(htmlResult);
            var countNode  = dom.GetElementsByClassName("listsum").FirstOrDefault()?.QuerySelector("em");

            return(Convert.ToInt32((countNode?.TextContent) ?? "0"));
        }
Exemplo n.º 11
0
        public ActionResult Info(string url, string noncestr)
        {
            if (string.IsNullOrEmpty(_ticket) || _lastTimestamp == null || (_lastTimestamp - DateTime.Now).Milliseconds > 7200)
            {
                var resultString = HTTPHelper.GetHTMLByURL("https://api.weixin.qq.com/cgi-bin/token?grant_type=client_credential&appid="
                                                           + appid + "&secret=" + secret);
                dynamic resultValue = JsonConvert.DeserializeObject <dynamic>(resultString);
                if (resultValue == null || resultValue.access_token == null || resultValue.access_token.Value == null)
                {
                    return(Json(new { issuccess = false, error = "获取token失败" }));
                }
                var token = resultValue.access_token.Value;

                resultString = HTTPHelper.GetHTMLByURL("https://api.weixin.qq.com/cgi-bin/ticket/getticket?access_token=" + token + "&type=jsapi");
                dynamic ticketValue = JsonConvert.DeserializeObject <dynamic>(resultString);
                if (ticketValue == null || ticketValue.errcode == null || ticketValue.errcode.Value != 0 || ticketValue.ticket == null)
                {
                    return(Json(new { issuccess = false, error = "获取ticketValue失败" }));
                }
                _ticket        = ticketValue.ticket.Value;
                _lastTimestamp = DateTime.Now;
                var timestamp = GetTimeStamp();
                var hexString = string.Format("jsapi_ticket={0}&noncestr={3}&timestamp={1}&url={2}",
                                              _ticket, timestamp, url, noncestr);

                return(Json(new
                {
                    issuccess = true,
                    sha1value = GetSHA1Value(hexString),
                    timestamp = timestamp,
                    url = url,
                    appid = appid,
                    debug = isDedug,
                    tiket = _ticket
                }, JsonRequestBehavior.AllowGet));
            }
            else
            {
                var timestamp = GetTimeStamp();
                var hexString = string.Format("jsapi_ticket={0}&noncestr=1234567890123456&timestamp={1}&url={2}",
                                              _ticket, timestamp, url);
                return(Json(new
                {
                    issuccess = true,
                    sha1value = GetSHA1Value(hexString),
                    timestamp = timestamp,
                    url = url,
                    appid = appid,
                    debug = isDedug,
                    tiket = _ticket
                }, JsonRequestBehavior.AllowGet));
            }
        }
Exemplo n.º 12
0
        private int GetPageNum(int costFrom, int costTo, string cnName)
        {
            var url        = $"http://{cnName}.58.com/zufang/pn1/?isreal=true&minprice={costFrom}_{costTo}";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var dom        = new HtmlParser().Parse(htmlResult);
            var pageNums   = dom.QuerySelector(".pager")?.QuerySelectorAll("span")?.Select(page =>
            {
                int number = 0;
                return(int.TryParse(page.TextContent, out number) ? number : 0);
            });

            return(pageNums != null && pageNums.Count() != 0 ? pageNums.Max() : 0);
        }
Exemplo n.º 13
0
 private static string GetHTMLOnJumpWebPage(string htmlDoc)
 {
     if (htmlDoc.Contains("window.location"))
     {
         var tempDom   = htmlParser.Parse(htmlDoc);
         var scriptDom = tempDom.QuerySelector("script");
         var tempURL   = "http://www.dy2018.com" + scriptDom.InnerHtml.Replace("window.location=", "")
                         .Replace("+", "").Replace("\"", "").Replace(" ", "").Replace(";", "");
         htmlDoc = HTTPHelper.GetHTMLByURL(tempURL);
         // LogHelper.Info($"GetHTML From JumpURL {(string.IsNullOrEmpty(htmlDoc) ? "Success" : "Fail")}!,the URL:{tempURL}");
     }
     //LogHelper.Info(htmlDoc);
     return(htmlDoc);
 }
Exemplo n.º 14
0
        private IEnumerable <HouseInfo> GetRoomList(int costFrom, int costTo, string cnName, int index)
        {
            var url        = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true&minprice={costFrom}_{costTo}";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var page       = new HtmlParser().Parse(htmlResult);
            var houseList  = page.QuerySelectorAll("tr[logr]").Select(room =>
                                                                      new HouseInfo
            {
                // HouseLocation=room.QuerySelector("a.a_xq1").TextContent.Replace("租房",""),
                HouseLocation = GetLocation(room),
                HouseTitle    = room.QuerySelector("a.t")?.TextContent,
                Money         = room.QuerySelector("b.pri")?.TextContent,
                HouseURL      = $"http://{cnName}.58.com/zufang/{room.GetAttribute("logr").Split('_')[3]}x.shtml"
            });

            return(houseList.Where(room => !string.IsNullOrEmpty(room.HouseLocation) && !string.IsNullOrEmpty(room.HouseTitle) && !string.IsNullOrEmpty(room.Money)));
        }
Exemplo n.º 15
0
        private IEnumerable <HouseInfo> GetRoomList(int costFrom, int costTo, string cnName, int index)
        {
            var url        = $"http://{cnName}.58.com/pinpaigongyu/pn/{index}/?minprice={costFrom}_{costTo}";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var page       = new HtmlParser().Parse(htmlResult);

            return(page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr")).Select(element =>
            {
                var houseTitle = element.QuerySelector("h2").TextContent;
                var houseInfoList = houseTitle.Split(' ');
                return new HouseInfo
                {
                    HouseTitle = houseTitle,
                    HouseURL = $"http://{cnName}.58.com" + element.QuerySelector("a").GetAttribute("href"),
                    Money = element.QuerySelector("b").TextContent,
                    HouseLocation = new[] { "公寓", "青年社区" }.All(s => houseInfoList.Contains(s)) ? houseInfoList[0] : houseInfoList[1]
                };
            }));
        }
Exemplo n.º 16
0
 /// <summary>
 /// 从在线网页提取电影数据
 /// </summary>
 /// <param name="onlineURL"></param>
 /// <returns></returns>
 private static Movie GetMovieInfoFromURL(string onlineURL)
 {
     try
     {
         var movieHTML = HTTPHelper.GetHTMLByURL(onlineURL);
         if (string.IsNullOrEmpty(movieHTML))
         {
             return(null);
         }
         var movieDoc       = htmlParser.Parse(movieHTML);
         var zoom           = movieDoc.GetElementById("Zoom");
         var lstDownLoadURL = movieDoc.QuerySelectorAll("[bgcolor='#fdfddf']");
         var updatetime     = movieDoc.QuerySelector("span.updatetime");
         var pubDate        = DateTime.Now;
         if (!string.IsNullOrEmpty(updatetime?.TextContent))
         {
             DateTime.TryParse(updatetime.TextContent.Replace("发布时间:", ""), out pubDate);
         }
         var lstURL    = lstDownLoadURL.Select(a => a.QuerySelector("a")?.TextContent ?? "");
         var movieName = movieDoc.QuerySelector("div.title_all")?.QuerySelector("h1");
         var movieInfo = new Movie()
         {
             ResourceName    = movieName.TextContent ?? "找不到影片信息...",
             OnlineUrl       = onlineURL,
             MovieIntro      = zoom?.TextContent ?? "暂无介绍...",
             DownLoadURLList = string.Join(";", lstURL),
             PubDate         = pubDate.Date,
             CreateTime      = DateTime.Now,
             SoureceDomain   = SoureceDomainConsts.Dy2018Domain,
             //MovieType=(int)MovieTypeEnum.Latest
         };
         return(movieInfo);
     }
     catch (Exception ex)
     {
         //LogHelper.Error("Dy2018 GetMovieInfoFromURL Exception", ex, new { OnloneURL = onlineURL });
         return(null);
     }
 }
Exemplo n.º 17
0
 /// <summary>
 /// 爬取数据
 /// </summary>
 public static void CrawlHotMovie()
 {
     Task.Factory.StartNew(() =>
     {
         try
         {
             var newMovieCount = 0;
             //LogHelper.Info("Dy2018 CrawlHotMovie Start...");
             var htmlDoc = HTTPHelper.GetHTMLByURL("http://www.dy2018.com/");
             htmlDoc     = GetHTMLOnJumpWebPage(htmlDoc);
             var dom     = htmlParser.Parse(htmlDoc);
             dom.QuerySelectorAll("div.co_content222")
             ?.Take(3)
             .Select(divInfo => divInfo.QuerySelectorAll("a").Where(a => a.GetAttribute("href").StartsWith("/i/")))
             .Aggregate((IEnumerable <IElement> a, IEnumerable <IElement> b) => a.Concat(b))
             .ForEach(a =>
             {
                 var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href");
                 if (!MovieDataContent.resource.Any(mo => mo.OnlineUrl == onlineURL))
                 {
                     var movieInfo = GetMovieInfoFromURL(onlineURL);
                     if (movieInfo != null)
                     {
                         movieInfo.MovieType = MovieType.Latest;
                         MovieDataContent.resource.Add(movieInfo);
                         newMovieCount++;
                     }
                 }
             });
             MovieDataContent.SaveChanges();
             //LogHelper.Info($"Finish Dy2018 CrawlHotMovie,New Data Count:{newMovieCount}");
         }
         catch (Exception ex)
         {
             //LogHelper.Error("Dy2018 CrawlHotMovie Exception", ex);
         }
     });
 }
Exemplo n.º 18
0
        private IEnumerable <HouseInfo> GetRoomListByIndex(string cnName, int index)
        {
            var url        = $"http://{cnName}.58.com/pinpaigongyu/pn/{index}";
            var htmlResult = HTTPHelper.GetHTMLByURL(url);
            var page       = new HtmlParser().Parse(htmlResult);

            return(page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr")).Select(element =>
            {
                var houseTitle = element.QuerySelector("h2").TextContent;
                var houseInfoList = houseTitle.Split(' ');
                int.TryParse(element.QuerySelector("b").TextContent, out var housePrice);
                var markBGType = LocationMarkBGType.SelectColor(housePrice / 1000);

                return new HouseInfo
                {
                    HouseTitle = houseTitle,
                    HouseURL = $"http://{cnName}.58.com" + element.QuerySelector("a").GetAttribute("href"),
                    Money = element.QuerySelector("b").TextContent,
                    HouseLocation = new[] { "公寓", "青年社区" }.All(s => houseInfoList.Contains(s)) ? houseInfoList[0] : houseInfoList[1],
                    LocationMarkBG = markBGType,
                };
            }));
        }
Exemplo n.º 19
0
        public static void CrawlHostMovieInfo()
        {
            Task.Factory.StartNew(() =>
            {
                try
                {
                    #region

                    var indexURL = String.Format("http://www.girl13.com/page/1/");
                    var html     = HTTPHelper.GetHTMLByURL(indexURL, true);
                    if (string.IsNullOrEmpty(html))
                    {
                        return;
                    }
                    var htmlDom       = htmlParser.Parse(html);
                    int totalImgNum   = 0;
                    int pageCount     = 0;
                    int pageIndexItem = 0;

                    htmlDom.QuerySelector(".page-navigator")
                    .QuerySelectorAll("a")
                    .ForEach(a =>
                    {
                        if (int.TryParse(a.TextContent, out pageIndexItem))
                        {
                            if (pageCount < pageIndexItem)
                            {
                                pageCount = pageIndexItem;
                            }
                        }
                    });
                    Console.WriteLine(String.Format("找到美女页面{0}个", pageCount));
                    for (var i = 1; i <= pageCount; i++)
                    {
                        if (i != 1)
                        {
                            indexURL = String.Format("http://www.girl13.com/page/{0}/", i);
                            html     = HTTPHelper.GetHTMLByURL(indexURL, true);
                            if (string.IsNullOrEmpty(html))
                            {
                                break;
                            }
                            htmlDom = htmlParser.Parse(html);
                        }

                        var imgInPageCount = 0;
                        htmlDom.QuerySelector("#loop-square")
                        .QuerySelectorAll("img")
                        .ForEach(img =>
                        {
                            imgInPageCount++;
                            var onlineURL = img.GetAttribute("src");
                            if (!onlineURL.Contains("weix2.gif"))
                            {
                                MovieDataContent.Database.EnsureCreated();
                                if (!MovieDataContent.GirlsPics.Any(mo => mo.PicOriginUrl == onlineURL))
                                {
                                    var girlInfo          = new GirlsPics();
                                    girlInfo.Id           = Guid.NewGuid().ToString();
                                    girlInfo.PicOriginUrl = onlineURL;
                                    girlInfo.CreateTime   = System.DateTime.Now;

                                    var savedImgName = "";
                                    HTTPHelper.SaveResourceByURL(girlInfo.PicOriginUrl, out savedImgName);
                                    girlInfo.PicLocalUrl = savedImgName;

                                    MovieDataContent.GirlsPics.Add(girlInfo);
                                    Console.WriteLine($"{imgInPageCount}/{i}/{++totalImgNum}:{girlInfo.PicOriginUrl} | success.");
                                }
                            }
                        });
                        MovieDataContent.SaveChanges();
                        Console.WriteLine($"finished page {i}.");
                        LogHelper.Info($"finished page {i}.");
                    }

                    #endregion
                }
                catch (Exception ex)
                {
                    LogHelper.Error("Girl13 CrawlImg Exception", ex);
                }
            });
        }
Exemplo n.º 20
0
        /// <summary>
        /// 通过接口获取数据
        /// </summary>
        /// <typeparam name="T">返回数据类型</typeparam>
        /// <param name="apiURL">接口AIP地址</param>
        /// <returns>返回数据类型</returns>
        public static T GetAPIResult <T>(string apiURL)
        {
            var jsonResult = HTTPHelper.GetHTMLByURL(apiURL);

            return(Newtonsoft.Json.JsonConvert.DeserializeObject <T>(jsonResult));
        }