Exemple #1
0
        public void StoreAllMovieInGenres(int page = 1, string hrefInit = null)
        {
            string  baseUrl = "http://www.imdb.com";
            HtmlWeb htmlWeb;

            page = page == 1 ? 1 : page;

            while (page > 0)
            {
                var allHrefs = GetAllUrlsInSpecifiedGenre(baseUrl, out htmlWeb, page);

                if (allHrefs == null)
                {
                    page = 0;
                    break;
                }

                var allHrefList = allHrefs.ToList();

                if (allHrefList.Contains(hrefInit))
                {
                    var index = allHrefList.ToList().FindIndex(x => x == hrefInit);

                    if (index >= 0)
                    {
                        allHrefList.RemoveRange(0, index);
                    }
                }

                foreach (var href in allHrefList)
                {
                    var movieParams    = new MovieParams();
                    var detailDocument = htmlWeb.Load(href);

                    //< span itemprop = "actors" itemtype = "http://schema.org/Person" itemscope = "" >
                    //    < a href = "/name/nm1785339?ref_=tt_ov_st_sm" itemprop = "url" >
                    //        < span class="itemprop" itemprop="name">Rami Malek</span>
                    //    </a>
                    //</span>

                    HtmlNode documentNode = detailDocument.DocumentNode;
                    var      title        = GetMovieTitle(documentNode);
                    movieParams.Title = title;

                    var date = GetMovieDate(documentNode);
                    movieParams.Year = date;

                    var originalTitle = GetMovieOriginalTitle(documentNode);
                    movieParams.OriginalTitle = originalTitle;

                    movieParams.Status   = Status.Active;
                    movieParams.UserType = UserTypes.Dashboard;


                    var creditNodes = documentNode.SelectNodes(".//div[@class='credit_summary_item']");
                    var stars       = new List <string>();

                    if (creditNodes != null)
                    {
                        stars = creditNodes.SelectMany(x => x.Descendants("span")
                                                       .Where(y => y.GetAttributeValue("class", "") == "itemprop" && y.GetAttributeValue("itemprop", "") == "name")
                                                       .Select(z => z.InnerText)).ToList();
                    }


                    string rating;
                    var    ratingNode = documentNode.SelectSingleNode(".//span[@itemprop='ratingValue']");

                    if (ratingNode == null)
                    {
                        continue;
                    }

                    rating = ratingNode.InnerText;

                    movieParams.Rating = Double.Parse(rating.Replace(".", ","));

                    var ratingCount = documentNode.SelectSingleNode(".//span[@itemprop='ratingCount']")?.InnerText;
                    movieParams.NumberOfVotes = Int32.Parse(ratingCount?.Replace(",", ""));

                    var country = GetCountry(documentNode);
                    movieParams.Country = country;

                    var titleWrapper = documentNode.SelectSingleNode("//div[@class='title_wrapper']");

                    var typeAnchor = titleWrapper.SelectSingleNode("div/a[last()]");

                    if (typeAnchor != null)
                    {
                        movieParams.MovieType = typeAnchor.InnerText
                                                .Contains(MovieType.MiniSeries.GetEnumDescription <DisplayAttribute>().Name) ?
                                                MovieType.MiniSeries : typeAnchor.InnerText
                                                .Contains(MovieType.Series.GetEnumDescription <DisplayAttribute>().Name) ?
                                                MovieType.Series : typeAnchor.InnerText
                                                .Contains(MovieType.TvMovie.GetEnumDescription <DisplayAttribute>().Name) ? MovieType.TvMovie : MovieType.Movie;
                    }

                    var genreList = GetMovieCategories(documentNode)?.Select(x => x.Name).ToList();

                    var crewHref = baseUrl + documentNode.SelectSingleNode("//a[@class='quicklink']").GetAttributeValue("href", "404");

                    var crewNameCharacterPairs = GetCrewNamesOfMovie(crewHref, htmlWeb, stars);
                    var directors = GetDirectors(crewHref, htmlWeb);

                    movieParams.Director = directors;
                    _movieService.CreateOrUpdate(movieParams);

                    if (genreList != null)
                    {
                        _genreService.CreateGenres(genreList, movieParams.BaseEntityId);
                    }

                    if (crewNameCharacterPairs != null)
                    {
                        _actorService.CreateActors(crewNameCharacterPairs, movieParams.BaseEntityId);
                    }
                }
                page = page + 1;
            }
        }