public static MovieInfo GetMovieInfoByOnlineURL(string onlineURL, bool isContainIntro = false) { var html = HTTPHelper.GetHTMLByURL(onlineURL); if (string.IsNullOrEmpty(html)) { return(null); } var htmlDom = htmlParser.Parse(html); var nameDom = htmlDom.QuerySelector("h1.font14"); var introDom = htmlDom.QuerySelector("div.Drama_c"); var infoTable = htmlDom.QuerySelectorAll("tr.CommonListCell"); var pubDate = DateTime.Now; if (infoTable != null && infoTable.Length > 2) { if (infoTable[1] != null && !string.IsNullOrEmpty(infoTable[1].TextContent)) { DateTime.TryParse(infoTable[1].TextContent.Replace("发布时间", "").Replace("\n", ""), out pubDate); } } return(new MovieInfo() { MovieName = nameDom != null ? nameDom.InnerHtml : "获取名称失败...", Dy2018OnlineUrl = onlineURL, MovieIntro = introDom != null && isContainIntro ? introDom.InnerHtml : "", PubDate = pubDate, }); }
public static void CrawlHotClickMovieInfo(int endIndex = 10) { Task.Factory.StartNew(() => { try { LogHelper.Info("CrawlHotClickMovieInfo Start..."); for (int index = 1; index <= endIndex; index++) { var indexURL = $"http://www.btdytt520.com/hotclick/p_{index}.html"; var html = HTTPHelper.GetHTMLByURL(indexURL); if (string.IsNullOrEmpty(html)) { return; } var htmlDom = htmlParser.Parse(html); foreach (var li in htmlDom.QuerySelectorAll("li.newsli")) { var aDom = li.QuerySelectorAll("a").FirstOrDefault(a => !string.IsNullOrEmpty(a.GetAttribute("target"))); if (aDom == null) { continue; } var onlineURL = "http://www.btdytt520.com/" + aDom.GetAttribute("href"); if (hotMoviceHelper.IsContainsMoive(onlineURL) || li.QuerySelector("li.phlidate") == null) { continue; } var pubDate = DateTime.Now; DateTime.TryParse(li.QuerySelector("li.phlidate").InnerHtml, out pubDate); hotMoviceHelper.AddToMovieDic(new MovieInfo() { Dy2018OnlineUrl = onlineURL, MovieName = aDom.InnerHtml, PubDate = pubDate, }); } } LogHelper.Info("CrawlHotClickMovieInfo Finish."); } catch (Exception ex) { LogHelper.Error("CrawlHotClickMovieInfo Exception", ex); LogHelper.Info("CrawlHotClickMovieInfo Finish."); } }); }
public static void CrawlLatestMovieInfo() { var indexURL = "http://www.btdytt520.com/movie/"; var html = HTTPHelper.GetHTMLByURL(indexURL); if (string.IsNullOrEmpty(html)) { return; } var htmlDom = htmlParser.Parse(html); var divMovie = htmlDom.QuerySelector("div.index_Sidebar_cc"); var lstMovie = divMovie.QuerySelectorAll("a").Select(a => new MovieInfo() { Dy2018OnlineUrl = "http://www.btdytt520.com/" + a.GetAttribute("href"), MovieName = a.InnerHtml }).ToList(); }
/// <summary> /// 爬取数据 /// </summary> public static void CrawlHotMovie() { Task.Factory.StartNew(() => { try { LogHelper.Info("CrawlHotMovie Start..."); var htmlDoc = HTTPHelper.GetHTMLByURL("http://www.dy2018.com/"); var dom = htmlParser.Parse(htmlDoc); var lstDivInfo = dom.QuerySelectorAll("div.co_content222"); if (lstDivInfo != null) { //前三个DIV为新电影 foreach (var divInfo in lstDivInfo.Take(3)) { divInfo.QuerySelectorAll("a").Where(a => a.GetAttribute("href").Contains("/i/")).ToList().ForEach( a => { var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href"); if (!hotMovieList.IsContainsMoive(onlineURL)) { MovieInfo movieInfo = Dy2018MoviceInfoHelper.GetMovieInfoFromOnlineURL(onlineURL); if (movieInfo != null && movieInfo.XunLeiDownLoadURLList != null && movieInfo.XunLeiDownLoadURLList.Count != 0) { hotMovieList.AddToMovieDic(movieInfo); } } }); } } LogHelper.Info("CrawlHotMovie Finish..."); } catch (Exception ex) { LogHelper.Error("CrawlHotMovie Exception", ex); } }); }
public static void CrawlHostMovieInfo() { var indexURL = "http://www.btdytt520.com/movie/"; var html = HTTPHelper.GetHTMLByURL(indexURL); if (string.IsNullOrEmpty(html)) { return; } var htmlDom = htmlParser.Parse(html); var divMovie = htmlDom.QuerySelector("div.index_Sidebar_cc"); divMovie.QuerySelectorAll("a").Select(a => a).ToList().ForEach( a => { var aURL = "http://www.btdytt520.com" + a.GetAttribute("href"); if (!hotMoviceHelper.IsContainsMoive(aURL)) { hotMoviceHelper.AddToMovieDic(Btdytt520Helper.GetMovieInfoByOnlineURL(aURL)); } }); }
/// <summary> /// 从在线网页提取电影数据 /// </summary> /// <param name="onlineURL"></param> /// <returns></returns> public static MovieInfo GetMovieInfoFromOnlineURL(string onlineURL, bool isContainIntro = false) { try { var movieHTML = HTTPHelper.GetHTMLByURL(onlineURL); if (string.IsNullOrEmpty(movieHTML)) { return(null); } var movieDoc = htmlParser.Parse(movieHTML); var zoom = movieDoc.GetElementById("Zoom"); var lstDownLoadURL = movieDoc.QuerySelectorAll("[bgcolor='#fdfddf']"); var updatetime = movieDoc.QuerySelector("span.updatetime"); var pubDate = DateTime.Now; if (updatetime != null && !string.IsNullOrEmpty(updatetime.InnerHtml)) { DateTime.TryParse(updatetime.InnerHtml.Replace("发布时间:", ""), out pubDate); } var lstOnlineURL = lstDownLoadURL.Select(a => a.QuerySelector("a")).Where(item => item != null).Select(item => item.InnerHtml).ToList(); var movieName = movieDoc.QuerySelector("div.title_all"); var movieInfo = new MovieInfo() { MovieName = movieName != null && movieName.QuerySelector("h1") != null? movieName.QuerySelector("h1").InnerHtml : "找不到影片信息...", Dy2018OnlineUrl = onlineURL, MovieIntro = zoom != null && isContainIntro?WebUtility.HtmlEncode(zoom.InnerHtml) : "暂无介绍...", XunLeiDownLoadURLList = lstOnlineURL, PubDate = pubDate, }; return(movieInfo); } catch (Exception ex) { LogHelper.Error("GetMovieInfoFromOnlineURL Exception", ex, new { OnloneURL = onlineURL }); return(null); } }
/// <summary> /// 从在线网页提取数据 /// </summary> /// <param name="i"></param> private static void FillMovieFromOnline(string indexURL) { var htmlDoc = HTTPHelper.GetHTMLByURL(indexURL); var dom = htmlParser.Parse(htmlDoc); var lstDivInfo = dom.QuerySelectorAll("div.co_content8"); if (lstDivInfo != null) { lstDivInfo.FirstOrDefault().QuerySelectorAll("a").Where(a => a.GetAttribute("href").Contains("/i/")).ToList() .ForEach(a => { var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href"); if (!latestMovieList.IsContainsMoive(onlineURL)) { MovieInfo movieInfo = Dy2018MoviceInfoHelper.GetMovieInfoFromOnlineURL(onlineURL); if (movieInfo != null && movieInfo.XunLeiDownLoadURLList != null && movieInfo.XunLeiDownLoadURLList.Count != 0) { latestMovieList.AddToMovieDic(movieInfo); } } }); } }