Ejemplo n.º 1
0
        public void TVShowWithNoEndTest()
        {
            IIMDbDataProvider imdbDataProvider = new IMDbScraperDataProvider();
            Production        production       = imdbDataProvider.GetProduction(48861, new FullProductionDataFetchSettings());

            Assert.IsNotNull(production);
            Assert.AreEqual(48861, production.IMDbID);
            Assert.IsTrue(production.ProductionType == ProductionTypeEnum.TVSeries);
            Assert.IsTrue((production as TVSeries).Year != default(int));
            Assert.IsFalse((production as TVSeries).EndYear.HasValue);
        }
Ejemplo n.º 2
0
        public void DetailedProductionScraping()
        {
            IIMDbDataProvider imdbDataProvider = new IMDbScraperDataProvider();

            foreach (long productionID in productionIDsTotest)
            {
                Production production = imdbDataProvider.GetProduction(productionID, new FullProductionDataFetchSettings());
                Assert.IsNotNull(production);
                Assert.AreEqual(productionID, production.IMDbID);
            }
        }
Ejemplo n.º 3
0
        public void DetailedPersonScraping()
        {
            IIMDbDataProvider imdbDataProvider = new IMDbScraperDataProvider();

            foreach (long personID in personIDsToTest)
            {
                Person person = imdbDataProvider.GetPerson(personID, new FullPersonDataFetchSettings());
                Assert.IsNotNull(person);
                Assert.IsFalse(string.IsNullOrEmpty(person.FullName));
                Assert.AreEqual(personID, person.IMDbID);
            }
        }
Ejemplo n.º 4
0
        static void Main(string[] args)
        {
            var builder = new ConfigurationBuilder()
                          .AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
                          .AddJsonFile($"appsettings.{EnvironmentUtilities.GetEnvironmentName()}.json", optional: true, reloadOnChange: true)
                          .AddEnvironmentVariables()
                          .AddCommandLine(args);

            BaseJobConfiguration configuration = builder.Build().Get <BaseJobConfiguration>();

            InitializationHelper.Initialize(configuration);
            var services = new ServiceCollection();

            services.AddOptions();
            var serviceProvider = services.BuildServiceProvider();

            if (configuration.MaxRecordCount == default(int))
            {
                configuration.MaxRecordCount = ConfigurationConstants.PersisterRecordCountPerRun;
            }

            using (JMoviesEntities entities = new JMoviesEntities())
            {
                IIMDbDataProvider imdbDataProvider = new IMDbScraperDataProvider();
                if (configuration.StartRecordID == default(long) || configuration.WorkingType == PersisterWorkingTypeEnum.UpdateInternalData)
                {
                    configuration.StartRecordID = PersisterHelper.DetermineTheStartID(EntityType, DataSource, configuration.WorkingType, configuration.StartRecordID, entities);
                }
                long dataID = configuration.StartRecordID;
                for (int i = 0; i < configuration.MaxRecordCount; i++)
                {
                    if (i != 0)
                    {
                        dataID = PersisterHelper.GetNextID(EntityType, DataSource, configuration.WorkingType, entities, dataID);
                    }

                    if (dataID != default(long))
                    {
                        if (dataID > ConfigurationConstants.IMDBMaxID)
                        {
                            dataID = 1;
                        }

                        try
                        {
                            Production production = imdbDataProvider.GetProduction(dataID, ProductionDataFetchSettings);
                            DbContextOptionsBuilder <JMoviesEntities> dbContextOptionsBuilder = new DbContextOptionsBuilder <JMoviesEntities>();
                            dbContextOptionsBuilder.UseLazyLoadingProxies(true);
                            using (JMoviesEntities productionPersistanceEntities = new JMoviesEntities(dbContextOptionsBuilder.Options))
                            {
                                ProductionPersistanceManager.Persist(productionPersistanceEntities, production);
                            }
                            PersisterHelper.SavePersisterHistory(entities, dataID, DataSource, EntityType, string.Empty);
                        }
                        catch (Exception exception)
                        {
                            PersisterHelper.SavePersisterHistory(entities, dataID, DataSource, EntityType, exception.ToString());
                        }
                        entities.SaveChanges();
                    }
                }
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Main Parse method of the Movie Page
        /// </summary>
        /// <param name="providerInstance">Instance reference of the IMDbScraperDataProvider</param>
        /// <param name="movie">Movie instance that is populated</param>
        /// <param name="documentNode">Document Node of the movie page</param>
        /// <param name="moviePageUrl">URL of the movie page</param>
        /// <param name="settings">Object containing Data Fetch settings</param>
        /// <returns>If scraping was successful or not</returns>
        public static bool Parse(IMDbScraperDataProvider providerInstance, ref Movie movie, HtmlNode documentNode, string moviePageUrl, ProductionDataFetchSettings settings)
        {
            HtmlNode titleTypeTag = documentNode.QuerySelector("meta[property='og:type']");

            if (titleTypeTag != null && titleTypeTag.Attributes["content"].Value == IMDbConstants.TVSeriesOgType)
            {
                //Initialize movie as TV Series
                movie = new TVSeries
                {
                    IMDbID = movie.IMDbID
                };
            }

            //Parse Title
            HtmlNode titleWrapper = documentNode.QuerySelector(".title_wrapper");

            if (titleWrapper != null)
            {
                movie.Title = titleWrapper.QuerySelector("h1").InnerText.Prepare();
                if (IMDbConstants.MovieYearRegex.IsMatch(movie.Title))
                {
                    Match yearMatch = IMDbConstants.MovieYearRegex.Match(movie.Title);
                    movie.Year  = yearMatch.Groups[2].Value.Trim().ToInteger();
                    movie.Title = yearMatch.Groups[1].Value.Trim();
                }
                HtmlNode originalTitleNode = titleWrapper.QuerySelector(".originalTitle");
                if (originalTitleNode != null)
                {
                    movie.OriginalTitle = originalTitleNode.InnerText.Prepare();
                }

                foreach (HtmlNode titleLink in titleWrapper.QuerySelectorAll("a"))
                {
                    if (titleLink.OuterHtml.Contains("/releaseinfo"))
                    {
                        Match yearMatch = IMDbConstants.MovieYearRegex.Match(titleLink.InnerText.Prepare());
                        if (yearMatch.Success)
                        {
                            movie.Year = yearMatch.Groups[2].Value.Trim().ToInteger();
                            if (yearMatch.Groups.Count > 3)
                            {
                                string endYearString = yearMatch.Groups[3].Value.Trim();
                                if (!string.IsNullOrEmpty(endYearString))
                                {
                                    (movie as TVSeries).EndYear = yearMatch.Groups[3].Value.Trim().ToInteger();
                                }
                            }
                        }
                    }
                }
            }
            else
            {
                return(false);
            }

            HtmlNode posterNode = documentNode.QuerySelector(".poster img");

            if (posterNode != null)
            {
                movie.Poster = new Image
                {
                    Title = posterNode.GetAttributeValue("title", string.Empty),
                    URL   = IMDBImageHelper.NormalizeImageUrl(posterNode.GetAttributeValue("src", string.Empty))
                };
                if (settings.FetchImageContents)
                {
                    movie.Poster.Content = IMDBImageHelper.GetImageContent(movie.Poster.URL);
                }
            }

            //Parse Summary
            HtmlNode      summaryWrapper = documentNode.QuerySelector(".plot_summary_wrapper");
            List <Credit> credits        = new List <Credit>();

            if (summaryWrapper != null)
            {
                HtmlNode summaryText = summaryWrapper.QuerySelector(".summary_text");
                if (summaryText != null)
                {
                    movie.PlotSummary = summaryText.FirstChild.InnerText.Prepare();
                    if (movie.PlotSummary.StartsWith(IMDbConstants.EmptyPlotText))
                    {
                        movie.PlotSummary = string.Empty;
                    }
                }

                foreach (HtmlNode creditSummaryNode in summaryWrapper.QuerySelectorAll(".credit_summary_item"))
                {
                    List <Credit> summaryCredits = SummaryCastHelper.GetCreditInfo(creditSummaryNode);
                    if (summaryCredits != null && summaryCredits.Count > 0)
                    {
                        credits.AddRange(summaryCredits);
                    }
                }
            }
            else
            {
                return(false);
            }

            //Parse Story Line
            HtmlNode storyLineSection = documentNode.QuerySelector("#titleStoryLine");

            if (storyLineSection != null)
            {
                SummaryStorylineHelper.Parse(movie, storyLineSection);
            }

            //Parse Details Section
            HtmlNode detailsSection = documentNode.QuerySelector("#titleDetails");

            if (detailsSection != null)
            {
                MoviePageDetailsHelper.ParseDetailsSection(movie, detailsSection);
            }

            if (!settings.FetchDetailedCast)
            {
                //Parse Cast Table
                HtmlNode castListNode = documentNode.QuerySelector(".cast_list");
                ParseCastList(movie, credits, castListNode);
            }
            else
            {
                //Fetch credits through full credits page
                string       fullCreditsUrl         = moviePageUrl + "/" + IMDbConstants.FullCreditsPath;
                WebRequest   fullCreditsPageRequest = HttpHelper.InitializeWebRequest(fullCreditsUrl);
                HtmlDocument creditsPageDocument    = HtmlHelper.GetNewHtmlDocument();
                using (Stream stream = HttpHelper.GetResponseStream(fullCreditsPageRequest))
                {
                    creditsPageDocument.Load(stream, Encoding.UTF8);
                }
                HtmlNode fullCreditsPageDocumentNode = creditsPageDocument.DocumentNode;
                HtmlNode fullCreditsPageCastListNode = fullCreditsPageDocumentNode.QuerySelector(".cast_list");
                ParseCastList(movie, credits, fullCreditsPageCastListNode);
                movie.Credits = credits;
            }

            #region  Parse Relase Info Page
            string       releaseInfoURL          = moviePageUrl + "/" + IMDbConstants.ReleaseInfoPath;
            WebRequest   releaseInfoPageRequest  = HttpHelper.InitializeWebRequest(releaseInfoURL);
            HtmlDocument releaseInfoPageDocument = HtmlHelper.GetNewHtmlDocument();
            using (Stream stream = HttpHelper.GetResponseStream(releaseInfoPageRequest))
            {
                releaseInfoPageDocument.Load(stream, Encoding.UTF8);
            }
            ReleaseInfoPageHelper.Parse(movie, releaseInfoPageDocument);
            #endregion
            #region Parse Ratings
            HtmlNode ratingsWrapper = documentNode.QuerySelector(".imdbRating");
            if (ratingsWrapper != null)
            {
                HtmlNode ratingNode      = ratingsWrapper.QuerySelector("span[itemprop='ratingValue']");
                HtmlNode ratingCountNode = ratingsWrapper.QuerySelector("span[itemprop='ratingCount']");
                movie.Rating           = new Rating(DataSourceTypeEnum.IMDb, movie);
                movie.Rating.Value     = double.Parse(ratingNode.InnerText.Prepare().Replace('.', ','));
                movie.Rating.RateCount = ratingCountNode.InnerText.Prepare().Replace(",", string.Empty).ToLong();
            }
            #endregion

            #region Parse Photo Gallery Page
            if (settings.MediaImagesFetchCount > 0)
            {
                string       photoGalleryURL          = moviePageUrl + "/" + IMDbConstants.PhotoGalleryPath;
                WebRequest   photoGalleryPageRequest  = HttpHelper.InitializeWebRequest(photoGalleryURL);
                HtmlDocument photoGalleryPageDocument = HtmlHelper.GetNewHtmlDocument();
                using (Stream stream = HttpHelper.GetResponseStream(photoGalleryPageRequest))
                {
                    photoGalleryPageDocument.Load(stream, Encoding.UTF8);
                }
                PhotoGalleryPageHelper.Parse(movie, photoGalleryPageDocument?.DocumentNode, settings);
            }
            #endregion
            return(true);
        }