Пример #1
0
        /// <summary>
        /// Get the producer with the longest interval between two consecutive awards
        /// </summary>
        /// <param name="movies">List of winning films</param>
        /// <returns>List of winning producers longest interval</returns>
        private IEnumerable <dynamic> ProducerLongestInterval(IEnumerable <MoviesInfo> movies)
        {
            var query = movies.GroupBy(g => g.Producer)
                        .Where(g => g.Count() > 1).ToList();

            var longest = query
                          .Select(g =>
            {
                MoviesInfoResult Max = new MoviesInfoResult()
                {
                    MaxInterval = Int32.MinValue
                    ,
                    Delta = 0
                    ,
                    PreviousWin = 0
                    ,
                    FollowingWin = 0
                    ,
                    Producer = ""
                };

                for (int i = 1; i < g.Count(); i++)
                {
                    MoviesInfo previous = g.ElementAt(i - 1),
                    current             = g.ElementAt(i);

                    Max.Delta = Math.Abs(current.Year - previous.Year);

                    if (Max.Delta > Max.MaxInterval)
                    {
                        Max.MaxInterval  = Max.Delta;
                        Max.PreviousWin  = previous.Year;
                        Max.FollowingWin = current.Year;
                        Max.Producer     = current.Producer;
                    }
                }

                return(new { Max });
            })
                          .ToList();

            var biggerInterval         = longest.Max(x => x.Max.MaxInterval);
            var biggerIntervalProducer = longest.Where(x => x.Max.MaxInterval == biggerInterval);

            return(biggerIntervalProducer);
        }
Пример #2
0
        /// <summary>
        /// Get the producer who wins two awards faster
        /// </summary>
        /// <param name="movies">List of winning films</param>
        /// <returns>List of producers</returns>
        private IEnumerable <dynamic> ProducerFastWinner(IEnumerable <MoviesInfo> movies)
        {
            var query = movies.GroupBy(g => g.Producer)
                        .Where(g => g.Count() > 1).ToList();

            var faster = query
                         .Select(g =>
            {
                MoviesInfoResult Min = new MoviesInfoResult()
                {
                    MinInterval = Int32.MaxValue
                    ,
                    Delta = 0
                    ,
                    PreviousWin = 0
                    ,
                    FollowingWin = 0
                    ,
                    Producer = ""
                };

                for (int i = 1; i < g.Count(); i++)
                {
                    MoviesInfo previous = g.ElementAt(i - 1),
                    current             = g.ElementAt(i);

                    Min.Delta = Math.Abs(current.Year - previous.Year);

                    if (Min.Delta < Min.MinInterval)
                    {
                        Min.MinInterval  = Min.Delta;
                        Min.PreviousWin  = previous.Year;
                        Min.FollowingWin = current.Year;
                        Min.Producer     = current.Producer;
                    }
                }

                return(new { Min });
            }).ToList();

            var shorterInterval         = faster.Min(x => x.Min.MinInterval);
            var shorterIntervalProducer = faster.Where(x => x.Min.MinInterval == shorterInterval);

            return(shorterIntervalProducer);
        }
Пример #3
0
 private string GetMovieDiv(MoviesInfo view)
 {
     return(string.Format("<div class=\"col-xs-6 col-sm-4 col-md-2\">\r\n" +
                          "<div class=\"video-div\">\r\n" +
                          "<div class=\"video-div-item\">\r\n" +
                          "<div class=\"item-overlay opacity r r-2x bg-black\">\r\n" +
                          "<div class=\"center text-center m-t-n\">\r\n" +
                          "<a href=\"VideoDetail?id={0}\"><i class=\"fa fa-play-circle i-2x\"></i></a>\r\n" +
                          "</div>" +
                          "</div>" +
                          "<a href=\"VideoDetail?id={0}\"><img class=\"video-div-img\" src=\"{1}\" alt={2} class=\"r r-2x img-full\"></a>\r\n" +
                          "</div>\r\n" +
                          "<div class=\"padder-v\">\r\n" +
                          "<a href=\"VideoDetail?id={0}\" class=\"text-ellipsis\">{2}</a>\r\n" +
                          "</div>" +
                          "</div>" +
                          "</div>", view.Id, view.ImageUrl, view.Name));
 }
Пример #4
0
        private void Hao123MoviesCrawler(List <string> urlList, bool isDetial = false)
        {
            HtmlParser htmlParser = new HtmlParser();
            string     resource   = Const.SourcesType.Hao123;

            for (var i = 0; i < urlList.Count; i++)
            {
                var crawler = new SimpleCrawler();

                crawler.OnStart += (s, e) =>
                {
                    Console.WriteLine("爬虫开始抓取地址:" + e.Uri.ToString());
                };
                crawler.OnError += (s, e) =>
                {
                    Console.WriteLine("爬虫抓取出现错误:" + e.Uri.ToString() + ",异常消息:" + e.Exception.Message);
                };
                crawler.OnCompleted += (s, e) =>
                {
                    if (isDetial)
                    {
                        var dom = htmlParser.ParseDocument(e.PageSource);

                        var moviesInfo    = new MoviesInfo();
                        var urlSourceList = new List <UrlSource>();

                        moviesInfo.Id       = GuidExtend.NewGuid();
                        moviesInfo.Resource = resource;

                        moviesInfo.CreateTime = DateTime.Now;

                        var a = dom.QuerySelectorAll("div.poster>a");
                        if (a.Any())
                        {
                            moviesInfo.Name = a[0].GetAttribute("title"); //--电影名称
                        }
                        else
                        {
                            return;
                        }

                        var stars = dom.All.Where(sl => sl.GetAttribute("monkey") == "actor").ToList();

                        if (stars.Any())
                        {
                            moviesInfo.Stars = string.Join(",", stars[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct());
                        }

                        var type = dom.All.Where(sl => sl.GetAttribute("monkey") == "category").ToList();

                        if (type.Any())
                        {
                            moviesInfo.Type = string.Join(",", type[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct());
                        }


                        var area = dom.All.Where(sl => sl.GetAttribute("monkey") == "area").ToList();

                        if (area.Any())
                        {
                            moviesInfo.Area = string.Join(",", area[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct());
                        }

                        var year = dom.All.Where(sl => sl.GetAttribute("monkey") == "decade").ToList();

                        if (year.Any())
                        {
                            moviesInfo.Year = string.Join(",", year[0].QuerySelectorAll("a").Select(X => X.InnerHtml).ToList().Distinct());
                        }

                        var img = dom.QuerySelectorAll("div.poster>a>img");

                        if (img.Any())
                        {
                            moviesInfo.ImageUrl = img[0].GetAttribute("src"); //--图片
                        }

                        var des = dom.QuerySelectorAll("p.abstract>em");

                        if (des.Any())
                        {
                            moviesInfo.Description = des[0].InnerHtml;
                        }

                        var url = dom.QuerySelectorAll("div.source>a.play-btn");

                        if (url.Any())
                        {
                            var urlSource = new UrlSource();
                            urlSource.Url         = url[0].GetAttribute("href");
                            urlSource.VideoSource = url[0].GetAttribute("alog-text");
                            urlSource.Id          = GuidExtend.NewGuid();
                            urlSource.MovieId     = moviesInfo.Id;
                            urlSource.Resource    = resource;

                            urlSourceList.Add(urlSource);
                        }

                        var urls = dom.QuerySelectorAll("div.source")[0].QuerySelectorAll("ul>li>a").
                                   Select(x => new UrlSource
                        {
                            Id          = GuidExtend.NewGuid(),
                            MovieId     = moviesInfo.Id,
                            Url         = x.GetAttribute("href"),
                            VideoSource = x.TextContent,
                            Resource    = resource
                        });

                        if (urls.Any())
                        {
                            urlSourceList.AddRange(urls);
                        }

                        if (!string.IsNullOrEmpty(moviesInfo.Name) && urlSourceList.Count > 0)
                        {
                            var oldData = _repository.All <MoviesInfo>(sl => sl.Name == moviesInfo.Name && sl.ImageUrl == moviesInfo.ImageUrl);

                            oldData.DeleteFromQuery();

                            _repository.DeleteByExpression <UrlSource>(sl => oldData.Select(m => m.Id).Contains(sl.MovieId));

                            _repository.Insert(moviesInfo, true);
                            _repository.BulkInsert <UrlSource>(urlSourceList);
                        }
                    }
                    else
                    {
                        var dom = htmlParser.ParseDocument(e.PageSource);

                        var MovieUrlList = dom.QuerySelectorAll("li.card>a").Select(a => a.GetAttribute("href")).ToList();

                        Hao123MoviesCrawler(MovieUrlList, true);
                    }
                };
                crawler.Start(new Uri(urlList[i])).Wait();
            }
        }