private IEnumerable <HouseInfo> GetRoomList(string url) { var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); return(page.QuerySelector("ul.screening_left_ul").QuerySelectorAll("li").Select(room => { var screening_time = room.QuerySelector("p.screening_time").TextContent; var screening_price = room.QuerySelector("h5").TextContent; var locationInfo = room.QuerySelector("a"); var locationContent = locationInfo.TextContent.Split(',').FirstOrDefault(); var location = locationContent.Remove(0, locationContent.IndexOf("租") + 1); int housePrice = 0; int.TryParse(screening_price.Replace("¥", "").Replace("元/月", ""), out housePrice); var markBGType = LocationMarkBGType.SelectColor(housePrice / 1000); return new HouseInfo { Money = screening_price, HouseURL = "http://www.huzhumaifang.com" + locationInfo.GetAttribute("href"), HouseLocation = location, HouseTime = screening_time, HousePrice = housePrice, LocationMarkBG = markBGType, }; })); }
private int GetPageNum(int costFrom, int costTo, string cnName) { var url = $"http://{cnName}.58.com/zufang/pn1/?isreal=true&minprice={costFrom}_{costTo}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); return(ParsePages(htmlResult)); }
private int GetPageCount(string indexURL) { var htmlResult = HTTPHelper.GetHTMLByURL(indexURL); var page = new HtmlParser().Parse(htmlResult); return(Convert.ToInt32(page.QuerySelector("a.end")?.TextContent ?? "0")); }
/// <summary> /// 从在线网页提取数据 /// </summary> /// <param name="i"></param> private static void CrawlerMovieInfoFromOnline(string indexURL, int movieType) { var newMovieCount = 0; var htmlDoc = HTTPHelper.GetHTMLByURL(indexURL); var dom = htmlParser.Parse(htmlDoc); dom.QuerySelector("div.co_content8") ?.QuerySelectorAll("a") .Where(a => a.GetAttribute("href").StartsWith("/i/")) .ForEach(a => { var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href"); if (!MovieDataContent.resource.Any(mo => mo.OnlineUrl == onlineURL)) { var movieInfo = GetMovieInfoFromURL(onlineURL); if (movieInfo != null) { movieInfo.MovieType = MovieType.Latest; MovieDataContent.resource.Add(movieInfo); newMovieCount++; } } }); MovieDataContent.SaveChanges(); //LogHelper.Info($"Finish Dy2018 Crawl {movieType.ToString()}MovieInfo,New Data Count:{newMovieCount},IndexURL:{indexURL}"); }
private int GetPageNumByIndex(string cnName) { var url = $"http://{cnName}.58.com/zufang/pn1/?isreal=true"; var htmlResult = HTTPHelper.GetHTMLByURL(url); return(ParsePages(htmlResult)); }
private IEnumerable <HouseInfo> GetRoomListByIndex(string cnName, int index) { var url = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); var houseList = page.QuerySelectorAll("tr[logr]").Where(room => room.QuerySelector("b.pri") != null).Select(room => { decimal housePrice = 0; decimal.TryParse(room.QuerySelector("b.pri").TextContent, out housePrice); var markBGType = (housePrice / 1000) > (int)LocationMarkBGType.Black ? LocationMarkBGType.Black : (LocationMarkBGType)(housePrice / 1000); return(new HouseInfo { // HouseLocation=room.QuerySelector("a.a_xq1").TextContent.Replace("租房",""), HouseLocation = room.QuerySelector("span.f12") != null && !string.IsNullOrEmpty(room.QuerySelector("span.f12").TextContent) ? room.QuerySelector("span.f12").TextContent.Replace("租房", "") : room.QuerySelector("a.a_xq1") != null && !string.IsNullOrEmpty(room.QuerySelector("a.a_xq1").TextContent) ? room.QuerySelector("a.a_xq1").TextContent.Replace("租房", "") : "", HouseTitle = room.QuerySelector("a.t") != null ? room.QuerySelector("a.t").TextContent : "", Money = room.QuerySelector("b.pri") != null ? room.QuerySelector("b.pri").TextContent : "", HouseURL = $"http://{cnName}.58.com/zufang/{room.GetAttribute("logr").Split('_')[3]}x.shtml", LocationMarkBG = markBGType.ToString() + ".png", }); }); return(houseList.Where(room => !string.IsNullOrEmpty(room.HouseLocation) && !string.IsNullOrEmpty(room.HouseTitle) && !string.IsNullOrEmpty(room.Money))); }
private IEnumerable <HouseInfo> GetRoomList(string url) { var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlResult); var uiInfo = page.GetElementsByClassName("screening_left_ul"); return(uiInfo.FirstOrDefault().QuerySelectorAll("li").Select(element => { var screening_time = element.QuerySelector("p.screening_time").TextContent; var screening_price = element.QuerySelector("h5").TextContent; var locationInfo = element.QuerySelectorAll("a").FirstOrDefault(); var locationInfoContent = locationInfo.TextContent; var locationContent = locationInfoContent.Split(',')[0]; var location = locationContent.Remove(0, locationContent.IndexOf("租") + 1); decimal housePrice = 0; decimal.TryParse(screening_price.Replace("¥", "").Replace("元/月", ""), out housePrice); var markBGType = (housePrice / 1000) > (int)LocationMarkBGType.Black ? LocationMarkBGType.Black : (LocationMarkBGType)(housePrice / 1000); return (new HouseInfo() { Money = screening_price, HouseURL = "http://www.huzhumaifang.com" + locationInfo.GetAttribute("href"), HouseLocation = location, HouseTime = screening_time, HousePrice = housePrice, LocationMarkBG = markBGType.ToString() + ".PNG", }); })); }
private IEnumerable <HouseInfo> GetRoomList(int costFrom, int costTo, string cnName, int index) { var url = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true&minprice={costFrom}_{costTo}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var houseList = ParseRoom(htmlResult); return(houseList); }
private IEnumerable <HouseInfo> GetRoomListByIndex(string cnName, int index) { var url = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var houseList = ParseRoom(htmlResult); return(houseList); }
private int GetListSum(int costFrom, int costTo, string cnName) { var url = $"http://{cnName}.58.com/pinpaigongyu/pn/{1}/?minprice={costFrom}_{costTo}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var dom = new HtmlParser().Parse(htmlResult); var countNode = dom.GetElementsByClassName("listsum").FirstOrDefault()?.QuerySelector("em"); return(Convert.ToInt32((countNode?.TextContent) ?? "0")); }
public ActionResult Info(string url, string noncestr) { if (string.IsNullOrEmpty(_ticket) || _lastTimestamp == null || (_lastTimestamp - DateTime.Now).Milliseconds > 7200) { var resultString = HTTPHelper.GetHTMLByURL("https://api.weixin.qq.com/cgi-bin/token?grant_type=client_credential&appid=" + appid + "&secret=" + secret); dynamic resultValue = JsonConvert.DeserializeObject <dynamic>(resultString); if (resultValue == null || resultValue.access_token == null || resultValue.access_token.Value == null) { return(Json(new { issuccess = false, error = "获取token失败" })); } var token = resultValue.access_token.Value; resultString = HTTPHelper.GetHTMLByURL("https://api.weixin.qq.com/cgi-bin/ticket/getticket?access_token=" + token + "&type=jsapi"); dynamic ticketValue = JsonConvert.DeserializeObject <dynamic>(resultString); if (ticketValue == null || ticketValue.errcode == null || ticketValue.errcode.Value != 0 || ticketValue.ticket == null) { return(Json(new { issuccess = false, error = "获取ticketValue失败" })); } _ticket = ticketValue.ticket.Value; _lastTimestamp = DateTime.Now; var timestamp = GetTimeStamp(); var hexString = string.Format("jsapi_ticket={0}&noncestr={3}×tamp={1}&url={2}", _ticket, timestamp, url, noncestr); return(Json(new { issuccess = true, sha1value = GetSHA1Value(hexString), timestamp = timestamp, url = url, appid = appid, debug = isDedug, tiket = _ticket }, JsonRequestBehavior.AllowGet)); } else { var timestamp = GetTimeStamp(); var hexString = string.Format("jsapi_ticket={0}&noncestr=1234567890123456×tamp={1}&url={2}", _ticket, timestamp, url); return(Json(new { issuccess = true, sha1value = GetSHA1Value(hexString), timestamp = timestamp, url = url, appid = appid, debug = isDedug, tiket = _ticket }, JsonRequestBehavior.AllowGet)); } }
private int GetPageNum(int costFrom, int costTo, string cnName) { var url = $"http://{cnName}.58.com/zufang/pn1/?isreal=true&minprice={costFrom}_{costTo}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var dom = new HtmlParser().Parse(htmlResult); var pageNums = dom.QuerySelector(".pager")?.QuerySelectorAll("span")?.Select(page => { int number = 0; return(int.TryParse(page.TextContent, out number) ? number : 0); }); return(pageNums != null && pageNums.Count() != 0 ? pageNums.Max() : 0); }
private static string GetHTMLOnJumpWebPage(string htmlDoc) { if (htmlDoc.Contains("window.location")) { var tempDom = htmlParser.Parse(htmlDoc); var scriptDom = tempDom.QuerySelector("script"); var tempURL = "http://www.dy2018.com" + scriptDom.InnerHtml.Replace("window.location=", "") .Replace("+", "").Replace("\"", "").Replace(" ", "").Replace(";", ""); htmlDoc = HTTPHelper.GetHTMLByURL(tempURL); // LogHelper.Info($"GetHTML From JumpURL {(string.IsNullOrEmpty(htmlDoc) ? "Success" : "Fail")}!,the URL:{tempURL}"); } //LogHelper.Info(htmlDoc); return(htmlDoc); }
private IEnumerable <HouseInfo> GetRoomList(int costFrom, int costTo, string cnName, int index) { var url = $"http://{cnName}.58.com/zufang/pn{index}/?isreal=true&minprice={costFrom}_{costTo}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); var houseList = page.QuerySelectorAll("tr[logr]").Select(room => new HouseInfo { // HouseLocation=room.QuerySelector("a.a_xq1").TextContent.Replace("租房",""), HouseLocation = GetLocation(room), HouseTitle = room.QuerySelector("a.t")?.TextContent, Money = room.QuerySelector("b.pri")?.TextContent, HouseURL = $"http://{cnName}.58.com/zufang/{room.GetAttribute("logr").Split('_')[3]}x.shtml" }); return(houseList.Where(room => !string.IsNullOrEmpty(room.HouseLocation) && !string.IsNullOrEmpty(room.HouseTitle) && !string.IsNullOrEmpty(room.Money))); }
private IEnumerable <HouseInfo> GetRoomList(int costFrom, int costTo, string cnName, int index) { var url = $"http://{cnName}.58.com/pinpaigongyu/pn/{index}/?minprice={costFrom}_{costTo}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); return(page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr")).Select(element => { var houseTitle = element.QuerySelector("h2").TextContent; var houseInfoList = houseTitle.Split(' '); return new HouseInfo { HouseTitle = houseTitle, HouseURL = $"http://{cnName}.58.com" + element.QuerySelector("a").GetAttribute("href"), Money = element.QuerySelector("b").TextContent, HouseLocation = new[] { "公寓", "青年社区" }.All(s => houseInfoList.Contains(s)) ? houseInfoList[0] : houseInfoList[1] }; })); }
/// <summary> /// 从在线网页提取电影数据 /// </summary> /// <param name="onlineURL"></param> /// <returns></returns> private static Movie GetMovieInfoFromURL(string onlineURL) { try { var movieHTML = HTTPHelper.GetHTMLByURL(onlineURL); if (string.IsNullOrEmpty(movieHTML)) { return(null); } var movieDoc = htmlParser.Parse(movieHTML); var zoom = movieDoc.GetElementById("Zoom"); var lstDownLoadURL = movieDoc.QuerySelectorAll("[bgcolor='#fdfddf']"); var updatetime = movieDoc.QuerySelector("span.updatetime"); var pubDate = DateTime.Now; if (!string.IsNullOrEmpty(updatetime?.TextContent)) { DateTime.TryParse(updatetime.TextContent.Replace("发布时间:", ""), out pubDate); } var lstURL = lstDownLoadURL.Select(a => a.QuerySelector("a")?.TextContent ?? ""); var movieName = movieDoc.QuerySelector("div.title_all")?.QuerySelector("h1"); var movieInfo = new Movie() { ResourceName = movieName.TextContent ?? "找不到影片信息...", OnlineUrl = onlineURL, MovieIntro = zoom?.TextContent ?? "暂无介绍...", DownLoadURLList = string.Join(";", lstURL), PubDate = pubDate.Date, CreateTime = DateTime.Now, SoureceDomain = SoureceDomainConsts.Dy2018Domain, //MovieType=(int)MovieTypeEnum.Latest }; return(movieInfo); } catch (Exception ex) { //LogHelper.Error("Dy2018 GetMovieInfoFromURL Exception", ex, new { OnloneURL = onlineURL }); return(null); } }
/// <summary> /// 爬取数据 /// </summary> public static void CrawlHotMovie() { Task.Factory.StartNew(() => { try { var newMovieCount = 0; //LogHelper.Info("Dy2018 CrawlHotMovie Start..."); var htmlDoc = HTTPHelper.GetHTMLByURL("http://www.dy2018.com/"); htmlDoc = GetHTMLOnJumpWebPage(htmlDoc); var dom = htmlParser.Parse(htmlDoc); dom.QuerySelectorAll("div.co_content222") ?.Take(3) .Select(divInfo => divInfo.QuerySelectorAll("a").Where(a => a.GetAttribute("href").StartsWith("/i/"))) .Aggregate((IEnumerable <IElement> a, IEnumerable <IElement> b) => a.Concat(b)) .ForEach(a => { var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href"); if (!MovieDataContent.resource.Any(mo => mo.OnlineUrl == onlineURL)) { var movieInfo = GetMovieInfoFromURL(onlineURL); if (movieInfo != null) { movieInfo.MovieType = MovieType.Latest; MovieDataContent.resource.Add(movieInfo); newMovieCount++; } } }); MovieDataContent.SaveChanges(); //LogHelper.Info($"Finish Dy2018 CrawlHotMovie,New Data Count:{newMovieCount}"); } catch (Exception ex) { //LogHelper.Error("Dy2018 CrawlHotMovie Exception", ex); } }); }
private IEnumerable <HouseInfo> GetRoomListByIndex(string cnName, int index) { var url = $"http://{cnName}.58.com/pinpaigongyu/pn/{index}"; var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new HtmlParser().Parse(htmlResult); return(page.QuerySelectorAll("li").Where(element => element.HasAttribute("logr")).Select(element => { var houseTitle = element.QuerySelector("h2").TextContent; var houseInfoList = houseTitle.Split(' '); int.TryParse(element.QuerySelector("b").TextContent, out var housePrice); var markBGType = LocationMarkBGType.SelectColor(housePrice / 1000); return new HouseInfo { HouseTitle = houseTitle, HouseURL = $"http://{cnName}.58.com" + element.QuerySelector("a").GetAttribute("href"), Money = element.QuerySelector("b").TextContent, HouseLocation = new[] { "公寓", "青年社区" }.All(s => houseInfoList.Contains(s)) ? houseInfoList[0] : houseInfoList[1], LocationMarkBG = markBGType, }; })); }
public static void CrawlHostMovieInfo() { Task.Factory.StartNew(() => { try { #region var indexURL = String.Format("http://www.girl13.com/page/1/"); var html = HTTPHelper.GetHTMLByURL(indexURL, true); if (string.IsNullOrEmpty(html)) { return; } var htmlDom = htmlParser.Parse(html); int totalImgNum = 0; int pageCount = 0; int pageIndexItem = 0; htmlDom.QuerySelector(".page-navigator") .QuerySelectorAll("a") .ForEach(a => { if (int.TryParse(a.TextContent, out pageIndexItem)) { if (pageCount < pageIndexItem) { pageCount = pageIndexItem; } } }); Console.WriteLine(String.Format("找到美女页面{0}个", pageCount)); for (var i = 1; i <= pageCount; i++) { if (i != 1) { indexURL = String.Format("http://www.girl13.com/page/{0}/", i); html = HTTPHelper.GetHTMLByURL(indexURL, true); if (string.IsNullOrEmpty(html)) { break; } htmlDom = htmlParser.Parse(html); } var imgInPageCount = 0; htmlDom.QuerySelector("#loop-square") .QuerySelectorAll("img") .ForEach(img => { imgInPageCount++; var onlineURL = img.GetAttribute("src"); if (!onlineURL.Contains("weix2.gif")) { MovieDataContent.Database.EnsureCreated(); if (!MovieDataContent.GirlsPics.Any(mo => mo.PicOriginUrl == onlineURL)) { var girlInfo = new GirlsPics(); girlInfo.Id = Guid.NewGuid().ToString(); girlInfo.PicOriginUrl = onlineURL; girlInfo.CreateTime = System.DateTime.Now; var savedImgName = ""; HTTPHelper.SaveResourceByURL(girlInfo.PicOriginUrl, out savedImgName); girlInfo.PicLocalUrl = savedImgName; MovieDataContent.GirlsPics.Add(girlInfo); Console.WriteLine($"{imgInPageCount}/{i}/{++totalImgNum}:{girlInfo.PicOriginUrl} | success."); } } }); MovieDataContent.SaveChanges(); Console.WriteLine($"finished page {i}."); LogHelper.Info($"finished page {i}."); } #endregion } catch (Exception ex) { LogHelper.Error("Girl13 CrawlImg Exception", ex); } }); }
/// <summary> /// 通过接口获取数据 /// </summary> /// <typeparam name="T">返回数据类型</typeparam> /// <param name="apiURL">接口AIP地址</param> /// <returns>返回数据类型</returns> public static T GetAPIResult <T>(string apiURL) { var jsonResult = HTTPHelper.GetHTMLByURL(apiURL); return(Newtonsoft.Json.JsonConvert.DeserializeObject <T>(jsonResult)); }