/// <summary> /// Main Parse method of the Photo Gallery Page /// </summary> /// <param name="movie">Movie instance that is populated</param> /// <param name="documentNode">Document Node of the photo gallery page</param> /// <param name="settings">Object containing Data Fetch settings</param> public static void Parse(Movie movie, HtmlNode documentNode, ProductionDataFetchSettings settings) { if (documentNode != null) { HtmlNode mediaIndexNode = documentNode.QuerySelector("#media_index_content"); movie.MediaImages = new List <Image>(); if (mediaIndexNode != null) { HtmlNode[] allImageNodes = mediaIndexNode.QuerySelectorAll("img").ToArray(); if (allImageNodes != null && allImageNodes.Length != 0) { int endIndex = allImageNodes.Length; if (settings.MediaImagesFetchCount < endIndex) { endIndex = settings.MediaImagesFetchCount; } for (int i = 0; i < endIndex; i++) { HtmlNode imageNode = allImageNodes[i]; Image image = new Image { Title = imageNode.GetAttributeValue("title", string.Empty), URL = IMDBImageHelper.NormalizeImageUrl(imageNode.GetAttributeValue("src", string.Empty)) }; if (settings.FetchImageContents) { image.Content = IMDBImageHelper.GetImageContent(image.URL); } movie.MediaImages.Add(image); } } } } }
private static void CleanupImageContents(ProductionDataFetchSettings settings, Production production) { if (!settings.FetchImageContents && production != null) { if (production.Poster != null) { production.Poster.Content = null; } if (production.MediaImages != null) { foreach (Image image in production.MediaImages) { image.Content = null; } } if (production is Movie) { Movie movie = (Movie)production; if (movie.Credits != null) { foreach (Credit credit in movie.Credits) { RemoveImageContentsFromPerson(credit.Person); } } } } }
/// <summary> /// Gets Production Information iresspective of type /// </summary> /// <param name="id">ID of the production</param> /// <param name="settings">Object containing Data Fetch settings</param> /// <returns>Production instance containing retreived information</returns> public Production GetProduction(long id, ProductionDataFetchSettings settings) { GetProductionDetailsRequest request = new GetProductionDetailsRequest { ID = id, Settings = settings }; GetProductionDetailsResponse response = jmAppClientProvider.CallAction <GetProductionDetailsResponse>(ActionNameConstants.GetProductionDetails, request); return(response.Production); }
/// <summary> /// Gets Production Information iresspective of type /// </summary> /// <param name="id">ID of the production</param> /// <param name="settings">Object containing Data Fetch settings</param> /// <returns>Production instance containing retreived information</returns> public Production GetProduction(long id, ProductionDataFetchSettings settings) { using (JMoviesEntities entities = new JMoviesEntities()) { Production production = entities.Production.FirstOrDefault(e => e.ID == id); if (settings.MediaImagesFetchCount != 0 && production != null) { //fetch poster production.Poster = entities.Image.FirstOrDefault(e => e.ID == production.PosterID); //fetch images int imageCount = settings.MediaImagesFetchCount; if (imageCount <= 0) { //default value imageCount = 5; } production.MediaImages = entities.Image.Where(e => e.ProductionID == production.ID).Take(imageCount).ToArray(); } if (production is Movie) { Movie movie = production as Movie; int castCount = settings.CastFetchCount; if (castCount <= 0) { //default value castCount = 5; } movie.Credits = entities.Credit.Include(e => (e as ActingCredit).Characters) .Where(e => e.ProductionID == production.ID).Take(castCount).ToArray(); if (movie.Credits != null) { foreach (Credit credit in movie.Credits) { credit.Person = entities.Person.FirstOrDefault(e => e.ID == credit.PersonID); credit.Person.PrimaryImage = entities.Image.FirstOrDefault(e => e.ID == credit.Person.PrimaryImageID); } } movie.Genres = entities.Genre.Where(e => e.ProductionID == production.ID).ToArray(); } CleanupImageContents(settings, production); return(production); } }
/// <summary> /// Gets Production Information iresspective of type /// </summary> /// <param name="id">ID of the production</param> /// <param name="settings">Object containing Data Fetch settings</param> /// <returns>Production instance containing retreived information</returns> public Production GetProduction(long id, ProductionDataFetchSettings settings) { if (id == default(long)) { throw new JMException("IMDbIDEmpty"); } else if (settings == null) { throw new JMException("SettingsEmpty"); } Movie movie = new Movie(); string url = IMDbConstants.BaseURL + IMDbConstants.MoviesPath + IMDbConstants.MovieIDPrefix + id.ToString().PadLeft(IMDbConstants.IMDbIDLength, '0'); HtmlDocument htmlDocument = HtmlHelper.GetNewHtmlDocument(); WebRequest webRequest = HttpHelper.InitializeWebRequest(url); using (Stream stream = HttpHelper.GetResponseStream(webRequest)) { htmlDocument.Load(stream, Encoding.UTF8); } HtmlNode documentNode = htmlDocument.DocumentNode; //Parse and verify IMDb ID Meta Tag HtmlNode idMetaTag = documentNode.QuerySelector("meta[property='pageId']"); if (idMetaTag != null) { movie.IMDbID = Regex.Replace(idMetaTag.Attributes["content"].Value, IMDbConstants.MovieIDPrefix, string.Empty).ToLong(); } else { return(null); } MoviePageDetailsHelper.Parse(this, ref movie, documentNode, url, settings); return(movie); }
/// <summary> /// Gets TV Series information /// </summary> /// <param name="id">ID of the TV Series</param> /// <param name="settings">Object containing Data Fetch settings</param> /// <returns>TV Series instance containing retreived information</returns> public TVSeries GetTvSeries(long id, ProductionDataFetchSettings settings) { return(GetProduction(id, settings) as TVSeries); }
/// <summary> /// Gets Movie information /// </summary> /// <param name="id">ID of the movie</param> /// <param name="settings">Object containing Data Fetch settings</param> /// <returns>Movie instance containing retreived information</returns> public Movie GetMovie(long id, ProductionDataFetchSettings settings) { return(GetProduction(id, settings) as Movie); }
/// <summary> /// Main Parse method of the Movie Page /// </summary> /// <param name="providerInstance">Instance reference of the IMDbScraperDataProvider</param> /// <param name="movie">Movie instance that is populated</param> /// <param name="documentNode">Document Node of the movie page</param> /// <param name="moviePageUrl">URL of the movie page</param> /// <param name="settings">Object containing Data Fetch settings</param> /// <returns>If scraping was successful or not</returns> public static bool Parse(IMDbScraperDataProvider providerInstance, ref Movie movie, HtmlNode documentNode, string moviePageUrl, ProductionDataFetchSettings settings) { HtmlNode titleTypeTag = documentNode.QuerySelector("meta[property='og:type']"); if (titleTypeTag != null && titleTypeTag.Attributes["content"].Value == IMDbConstants.TVSeriesOgType) { //Initialize movie as TV Series movie = new TVSeries { IMDbID = movie.IMDbID }; } //Parse Title HtmlNode titleWrapper = documentNode.QuerySelector(".title_wrapper"); if (titleWrapper != null) { movie.Title = titleWrapper.QuerySelector("h1").InnerText.Prepare(); if (IMDbConstants.MovieYearRegex.IsMatch(movie.Title)) { Match yearMatch = IMDbConstants.MovieYearRegex.Match(movie.Title); movie.Year = yearMatch.Groups[2].Value.Trim().ToInteger(); movie.Title = yearMatch.Groups[1].Value.Trim(); } HtmlNode originalTitleNode = titleWrapper.QuerySelector(".originalTitle"); if (originalTitleNode != null) { movie.OriginalTitle = originalTitleNode.InnerText.Prepare(); } foreach (HtmlNode titleLink in titleWrapper.QuerySelectorAll("a")) { if (titleLink.OuterHtml.Contains("/releaseinfo")) { Match yearMatch = IMDbConstants.MovieYearRegex.Match(titleLink.InnerText.Prepare()); if (yearMatch.Success) { movie.Year = yearMatch.Groups[2].Value.Trim().ToInteger(); if (yearMatch.Groups.Count > 3) { string endYearString = yearMatch.Groups[3].Value.Trim(); if (!string.IsNullOrEmpty(endYearString)) { (movie as TVSeries).EndYear = yearMatch.Groups[3].Value.Trim().ToInteger(); } } } } } } else { return(false); } HtmlNode posterNode = documentNode.QuerySelector(".poster img"); if (posterNode != null) { movie.Poster = new Image { Title = posterNode.GetAttributeValue("title", string.Empty), URL = IMDBImageHelper.NormalizeImageUrl(posterNode.GetAttributeValue("src", string.Empty)) }; if (settings.FetchImageContents) { movie.Poster.Content = IMDBImageHelper.GetImageContent(movie.Poster.URL); } } //Parse Summary HtmlNode summaryWrapper = documentNode.QuerySelector(".plot_summary_wrapper"); List <Credit> credits = new List <Credit>(); if (summaryWrapper != null) { HtmlNode summaryText = summaryWrapper.QuerySelector(".summary_text"); if (summaryText != null) { movie.PlotSummary = summaryText.FirstChild.InnerText.Prepare(); if (movie.PlotSummary.StartsWith(IMDbConstants.EmptyPlotText)) { movie.PlotSummary = string.Empty; } } foreach (HtmlNode creditSummaryNode in summaryWrapper.QuerySelectorAll(".credit_summary_item")) { List <Credit> summaryCredits = SummaryCastHelper.GetCreditInfo(creditSummaryNode); if (summaryCredits != null && summaryCredits.Count > 0) { credits.AddRange(summaryCredits); } } } else { return(false); } //Parse Story Line HtmlNode storyLineSection = documentNode.QuerySelector("#titleStoryLine"); if (storyLineSection != null) { SummaryStorylineHelper.Parse(movie, storyLineSection); } //Parse Details Section HtmlNode detailsSection = documentNode.QuerySelector("#titleDetails"); if (detailsSection != null) { MoviePageDetailsHelper.ParseDetailsSection(movie, detailsSection); } if (!settings.FetchDetailedCast) { //Parse Cast Table HtmlNode castListNode = documentNode.QuerySelector(".cast_list"); ParseCastList(movie, credits, castListNode); } else { //Fetch credits through full credits page string fullCreditsUrl = moviePageUrl + "/" + IMDbConstants.FullCreditsPath; WebRequest fullCreditsPageRequest = HttpHelper.InitializeWebRequest(fullCreditsUrl); HtmlDocument creditsPageDocument = HtmlHelper.GetNewHtmlDocument(); using (Stream stream = HttpHelper.GetResponseStream(fullCreditsPageRequest)) { creditsPageDocument.Load(stream, Encoding.UTF8); } HtmlNode fullCreditsPageDocumentNode = creditsPageDocument.DocumentNode; HtmlNode fullCreditsPageCastListNode = fullCreditsPageDocumentNode.QuerySelector(".cast_list"); ParseCastList(movie, credits, fullCreditsPageCastListNode); movie.Credits = credits; } #region Parse Relase Info Page string releaseInfoURL = moviePageUrl + "/" + IMDbConstants.ReleaseInfoPath; WebRequest releaseInfoPageRequest = HttpHelper.InitializeWebRequest(releaseInfoURL); HtmlDocument releaseInfoPageDocument = HtmlHelper.GetNewHtmlDocument(); using (Stream stream = HttpHelper.GetResponseStream(releaseInfoPageRequest)) { releaseInfoPageDocument.Load(stream, Encoding.UTF8); } ReleaseInfoPageHelper.Parse(movie, releaseInfoPageDocument); #endregion #region Parse Ratings HtmlNode ratingsWrapper = documentNode.QuerySelector(".imdbRating"); if (ratingsWrapper != null) { HtmlNode ratingNode = ratingsWrapper.QuerySelector("span[itemprop='ratingValue']"); HtmlNode ratingCountNode = ratingsWrapper.QuerySelector("span[itemprop='ratingCount']"); movie.Rating = new Rating(DataSourceTypeEnum.IMDb, movie); movie.Rating.Value = double.Parse(ratingNode.InnerText.Prepare().Replace('.', ',')); movie.Rating.RateCount = ratingCountNode.InnerText.Prepare().Replace(",", string.Empty).ToLong(); } #endregion #region Parse Photo Gallery Page if (settings.MediaImagesFetchCount > 0) { string photoGalleryURL = moviePageUrl + "/" + IMDbConstants.PhotoGalleryPath; WebRequest photoGalleryPageRequest = HttpHelper.InitializeWebRequest(photoGalleryURL); HtmlDocument photoGalleryPageDocument = HtmlHelper.GetNewHtmlDocument(); using (Stream stream = HttpHelper.GetResponseStream(photoGalleryPageRequest)) { photoGalleryPageDocument.Load(stream, Encoding.UTF8); } PhotoGalleryPageHelper.Parse(movie, photoGalleryPageDocument?.DocumentNode, settings); } #endregion return(true); }