/// <summary> /// Get the producer with the longest interval between two consecutive awards /// </summary> /// <param name="movies">List of winning films</param> /// <returns>List of winning producers longest interval</returns> private IEnumerable <dynamic> ProducerLongestInterval(IEnumerable <MoviesInfo> movies) { var query = movies.GroupBy(g => g.Producer) .Where(g => g.Count() > 1).ToList(); var longest = query .Select(g => { MoviesInfoResult Max = new MoviesInfoResult() { MaxInterval = Int32.MinValue , Delta = 0 , PreviousWin = 0 , FollowingWin = 0 , Producer = "" }; for (int i = 1; i < g.Count(); i++) { MoviesInfo previous = g.ElementAt(i - 1), current = g.ElementAt(i); Max.Delta = Math.Abs(current.Year - previous.Year); if (Max.Delta > Max.MaxInterval) { Max.MaxInterval = Max.Delta; Max.PreviousWin = previous.Year; Max.FollowingWin = current.Year; Max.Producer = current.Producer; } } return(new { Max }); }) .ToList(); var biggerInterval = longest.Max(x => x.Max.MaxInterval); var biggerIntervalProducer = longest.Where(x => x.Max.MaxInterval == biggerInterval); return(biggerIntervalProducer); }
/// <summary> /// Get the producer who wins two awards faster /// </summary> /// <param name="movies">List of winning films</param> /// <returns>List of producers</returns> private IEnumerable <dynamic> ProducerFastWinner(IEnumerable <MoviesInfo> movies) { var query = movies.GroupBy(g => g.Producer) .Where(g => g.Count() > 1).ToList(); var faster = query .Select(g => { MoviesInfoResult Min = new MoviesInfoResult() { MinInterval = Int32.MaxValue , Delta = 0 , PreviousWin = 0 , FollowingWin = 0 , Producer = "" }; for (int i = 1; i < g.Count(); i++) { MoviesInfo previous = g.ElementAt(i - 1), current = g.ElementAt(i); Min.Delta = Math.Abs(current.Year - previous.Year); if (Min.Delta < Min.MinInterval) { Min.MinInterval = Min.Delta; Min.PreviousWin = previous.Year; Min.FollowingWin = current.Year; Min.Producer = current.Producer; } } return(new { Min }); }).ToList(); var shorterInterval = faster.Min(x => x.Min.MinInterval); var shorterIntervalProducer = faster.Where(x => x.Min.MinInterval == shorterInterval); return(shorterIntervalProducer); }
private string GetMovieDiv(MoviesInfo view) { return(string.Format("<div class=\"col-xs-6 col-sm-4 col-md-2\">\r\n" + "<div class=\"video-div\">\r\n" + "<div class=\"video-div-item\">\r\n" + "<div class=\"item-overlay opacity r r-2x bg-black\">\r\n" + "<div class=\"center text-center m-t-n\">\r\n" + "<a href=\"VideoDetail?id={0}\"><i class=\"fa fa-play-circle i-2x\"></i></a>\r\n" + "</div>" + "</div>" + "<a href=\"VideoDetail?id={0}\"><img class=\"video-div-img\" src=\"{1}\" alt={2} class=\"r r-2x img-full\"></a>\r\n" + "</div>\r\n" + "<div class=\"padder-v\">\r\n" + "<a href=\"VideoDetail?id={0}\" class=\"text-ellipsis\">{2}</a>\r\n" + "</div>" + "</div>" + "</div>", view.Id, view.ImageUrl, view.Name)); }
private void Hao123MoviesCrawler(List <string> urlList, bool isDetial = false) { HtmlParser htmlParser = new HtmlParser(); string resource = Const.SourcesType.Hao123; for (var i = 0; i < urlList.Count; i++) { var crawler = new SimpleCrawler(); crawler.OnStart += (s, e) => { Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString()); }; crawler.OnError += (s, e) => { Console.WriteLine("爬虫抓取出现错误:" + e.Uri.ToString() + ",异常消息:" + e.Exception.Message); }; crawler.OnCompleted += (s, e) => { if (isDetial) { var dom = htmlParser.ParseDocument(e.PageSource); var moviesInfo = new MoviesInfo(); var urlSourceList = new List <UrlSource>(); moviesInfo.Id = GuidExtend.NewGuid(); moviesInfo.Resource = resource; moviesInfo.CreateTime = DateTime.Now; var a = dom.QuerySelectorAll("div.poster>a"); if (a.Any()) { moviesInfo.Name = a[0].GetAttribute("title"); //--电影名称 } else { return; } var stars = dom.All.Where(sl => sl.GetAttribute("monkey") == "actor").ToList(); if (stars.Any()) { moviesInfo.Stars = string.Join(",", stars[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct()); } var type = dom.All.Where(sl => sl.GetAttribute("monkey") == "category").ToList(); if (type.Any()) { moviesInfo.Type = string.Join(",", type[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct()); } var area = dom.All.Where(sl => sl.GetAttribute("monkey") == "area").ToList(); if (area.Any()) { moviesInfo.Area = string.Join(",", area[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct()); } var year = dom.All.Where(sl => sl.GetAttribute("monkey") == "decade").ToList(); if (year.Any()) { moviesInfo.Year = string.Join(",", year[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct()); } var img = dom.QuerySelectorAll("div.poster>a>img"); if (img.Any()) { moviesInfo.ImageUrl = img[0].GetAttribute("src"); //--图片 } var des = dom.QuerySelectorAll("p.abstract>em"); if (des.Any()) { moviesInfo.Description = des[0].InnerHtml; } var url = dom.QuerySelectorAll("div.source>a.play-btn"); if (url.Any()) { var urlSource = new UrlSource(); urlSource.Url = url[0].GetAttribute("href"); urlSource.VideoSource = url[0].GetAttribute("alog-text"); urlSource.Id = GuidExtend.NewGuid(); urlSource.MovieId = moviesInfo.Id; urlSource.Resource = resource; urlSourceList.Add(urlSource); } var urls = dom.QuerySelectorAll("div.source")[0].QuerySelectorAll("ul>li>a"). Select(x => new UrlSource { Id = GuidExtend.NewGuid(), MovieId = moviesInfo.Id, Url = x.GetAttribute("href"), VideoSource = x.TextContent, Resource = resource }); if (urls.Any()) { urlSourceList.AddRange(urls); } if (!string.IsNullOrEmpty(moviesInfo.Name) && urlSourceList.Count > 0) { var oldData = _repository.All <MoviesInfo>(sl => sl.Name == moviesInfo.Name && sl.ImageUrl == moviesInfo.ImageUrl); oldData.DeleteFromQuery(); _repository.DeleteByExpression <UrlSource>(sl => oldData.Select(m => m.Id).Contains(sl.MovieId)); _repository.Insert(moviesInfo, true); _repository.BulkInsert <UrlSource>(urlSourceList); } } else { var dom = htmlParser.ParseDocument(e.PageSource); var MovieUrlList = dom.QuerySelectorAll("li.card>a").Select(a => a.GetAttribute("href")).ToList(); Hao123MoviesCrawler(MovieUrlList, true); } }; crawler.Start(new Uri(urlList[i])).Wait(); } }