private static void HandleCharacters(JMoviesEntities entities, Production production, Production savedProduction) { Movie movie = production as Movie; Movie savedMovie = savedProduction as Movie; if (movie != null && movie.Credits != null) { foreach (Credit credit in movie.Credits.ToArray()) { if (credit is ActingCredit) { ActingCredit actingCredit = credit as ActingCredit; foreach (Character character in actingCredit.Characters.ToArray()) { EntityEntry entry = null; bool saved = false; if (savedMovie != null) { if (savedMovie.Credits == null) { savedMovie.Credits = entities.Credit.Where(e => e.ProductionID == savedMovie.ID).ToArray(); } Character savedCharacter = null; if (savedMovie.Credits != null) { long[] existingCreditIDs = savedMovie.Credits.Where(x => x.RoleType == CreditRoleType.Acting).Select(x => x.ID).ToArray(); savedCharacter = entities.Character.FirstOrDefault(currentCharacter => existingCreditIDs.Contains(currentCharacter.CreditID) && currentCharacter.Name == character.Name && currentCharacter.IMDbID == character.IMDbID); } if (savedCharacter != null) { character.ID = savedCharacter.ID; entry = CommonDBHelper.MarkEntityAsUpdated(entities, character, new string[] { "CharacterType" }); saved = true; } } character.CreditID = actingCredit.ID; if (!saved) { character.ID = CommonDBHelper.GetNewID <Character>(entities, e => e.ID); entry = entities.Character.Add(character); } entities.SaveChanges(); CommonDBHelper.DetachAllEntries(entities); } } } } }
/// <summary> /// Method responsible for parsing the cast list of the movie /// </summary> /// <param name="movie">Movie instance to be populated</param> /// <param name="credits">Credits list to be filled</param> /// <param name="castListNode">Html node that holds the cast list</param> private static void ParseCastList(Movie movie, List <Credit> credits, HtmlNode castListNode) { if (castListNode != null) { foreach (HtmlNode castNode in castListNode.QuerySelectorAll("tr")) { IEnumerable <HtmlNode> castColumns = castNode.QuerySelectorAll("td"); if (castColumns != null && castColumns.Count() == 4) { HtmlNode personNode = castColumns.ElementAt(1); HtmlNode charactersNode = castColumns.ElementAt(3); ActingCredit actingCredit = new ActingCredit(); actingCredit.Person = new Actor(); Match personIDMatch = IMDbConstants.PersonIDURLMatcher.Match(personNode.QuerySelector("a").Attributes["href"].Value); if (personIDMatch.Success && personIDMatch.Groups.Count > 1) { actingCredit.Person.IMDbID = personIDMatch.Groups[1].Value.ToLong(); actingCredit.Person.FullName = personNode.InnerText.Prepare(); } List <Character> characters = new List <Character>(); foreach (HtmlNode characterNode in charactersNode.QuerySelectorAll("a")) { Character character = GetCharacter(characterNode, movie); if (character != null) { characters.Add(character); } } if (characters.Count == 0) { Character character = GetCharacter(charactersNode.FirstChild, movie); if (character != null && (!string.IsNullOrEmpty(character.Name) || character.IMDbID != null)) { characters.Add(character); } } actingCredit.Characters = characters; credits.Add(actingCredit); } } movie.Credits = credits; } }
/// <summary> /// Method responsible for parsing the person page /// </summary> /// <param name="person">Person to be populated</param> /// <param name="documentNode">HTML Node containing the person page</param> /// <param name="settings">Object containing Data Fetch settings</param> public static void Parse(Person person, HtmlNode documentNode, PersonDataFetchSettings settings) { #region Main Details Parsing HtmlNode mainDetailsElement = documentNode.QuerySelector(".maindetails_center"); if (mainDetailsElement != null) { HtmlNode nameOverviewWidget = mainDetailsElement.QuerySelector(".name-overview-widget"); if (nameOverviewWidget != null) { HtmlNode nameContainer = nameOverviewWidget.QuerySelector("h1.header .itemprop"); if (nameContainer != null) { person.FullName = nameContainer.InnerText; } HtmlNode primaryImageElement = nameOverviewWidget.QuerySelector("#img_primary .image a img"); if (primaryImageElement != null) { Image image = new Image { Title = primaryImageElement.Attributes["title"].Value.Prepare(), URL = IMDBImageHelper.NormalizeImageUrl(primaryImageElement.Attributes["src"].Value) }; if (settings.FetchImageContents) { image.Content = IMDBImageHelper.GetImageContent(image.URL); } person.PrimaryImage = image; } HtmlNode jobCategoriesContainer = nameOverviewWidget.QuerySelector("div#name-job-categories"); if (jobCategoriesContainer != null) { List <CreditRoleType> roles = new List <CreditRoleType>(); foreach (HtmlNode jobCategoryLink in jobCategoriesContainer.QuerySelectorAll("a")) { CreditRoleType role = CreditRoleType.Undefined; string roleText = jobCategoryLink.InnerText.Prepare(); Enum.TryParse(roleText, out role); roles.Add(role); } person.Roles = roles; } List <Image> photos = new List <Image>(); HtmlNode mediaStripContainer = nameOverviewWidget.QuerySelector(".mediastrip_container"); if (mediaStripContainer != null) { HtmlNode[] allImageNodes = mediaStripContainer.QuerySelectorAll(".mediastrip a").ToArray(); int endIndex = allImageNodes.Length; if (settings.MediaImagesFetchCount < endIndex) { endIndex = settings.MediaImagesFetchCount; } for (int i = 0; i < endIndex; i++) { HtmlNode imageLink = allImageNodes[i]; HtmlNode imageNode = imageLink.Element("img"); Image image = new Image { Title = imageNode.Attributes["title"].Value.Prepare(), URL = IMDBImageHelper.NormalizeImageUrl(imageNode.Attributes["loadlate"].Value) }; if (settings.FetchImageContents) { image.Content = IMDBImageHelper.GetImageContent(image.URL); } photos.Add(image); } } person.Photos = photos; } else { HtmlNode nameHeader = documentNode.QuerySelector(".header"); if (nameHeader != null) { person.FullName = nameHeader.InnerText.Prepare(); } } } #endregion #region Bio Page Parsing if (settings.FetchBioPage) { BioPageHelper.ParseBioPage(person); } #endregion #region Filmography Parsing List <ProductionCredit> filmographyCredits = new List <ProductionCredit>(); HtmlNode filmographyElement = documentNode.QuerySelector("#filmography"); HtmlNode[] filmogpaphyCategories = documentNode.QuerySelectorAll(".filmo-category-section").ToArray(); DetectGender(person, filmogpaphyCategories); foreach (HtmlNode filmographyCategorySection in filmogpaphyCategories) { string categoryName = filmographyCategorySection.NodesBeforeSelf().FirstOrDefault(e => e.Name == "div").Attributes["data-category"].Value; categoryName = CultureInfo.InvariantCulture.TextInfo.ToTitleCase(categoryName.Replace("_", " ")); string categoryTypeString = categoryName.Replace(" ", string.Empty); CreditRoleType creditRoleType = CreditRoleType.Undefined; Enum.TryParse(categoryTypeString, out creditRoleType); } #endregion #region Known For Parsing HtmlNode knownForElement = documentNode.QuerySelector("#knownfor"); if (knownForElement != null) { List <ProductionCredit> knowForCredits = new List <ProductionCredit>(); foreach (HtmlNode knownForTitleNode in knownForElement.QuerySelectorAll(".knownfor-title")) { HtmlNode titleYearElement = knownForTitleNode.QuerySelector(".knownfor-year"); Match titleYearMatch = GeneralRegexConstants.PharantesisRegex.Match(titleYearElement.InnerText); int titleYear = default(int); int? titleEndYear = null; if (titleYearMatch.Success) { string titleYearString = titleYearMatch.Groups[1].Value; titleYearMatch = IMDbConstants.CreditYearRegex.Match(titleYearString); if (titleYearMatch.Success) { titleYear = titleYearMatch.Groups[1].Value.ToInteger(); if (titleYearMatch.Groups.Count >= 4) { titleEndYear = titleYearMatch.Groups[3].Value.ToInteger(); } } } HtmlNode roleElement = knownForTitleNode.QuerySelector(".knownfor-title-role"); HtmlNode movieLink = roleElement.Element("a"); ProductionCredit knownFor = new ProductionCredit(); if (titleEndYear != null) { knownFor.Production = new TVSeries { EndYear = (int)titleEndYear }; } else { knownFor.Production = new Movie(); } knownFor.Production.IMDbID = (long)IMDBIDHelper.GetIDFromUrl(movieLink.Attributes["href"].Value); knownFor.Production.Title = movieLink.InnerText.Prepare(); knownFor.Production.Year = titleYear; string role = roleElement.Element("span").InnerText.Prepare(); CreditRoleType roleType = CreditRoleType.Undefined; if (!Enum.TryParse <CreditRoleType>(role, out roleType)) { roleType = CreditRoleType.Acting; if (person.Gender == GenderEnum.Male) { roleType = CreditRoleType.Actor; } else if (person.Gender == GenderEnum.Female) { roleType = CreditRoleType.Actress; } } knownFor.Credit = new CreditFactory().Build(roleType); knownFor.Credit.RoleType = roleType; knownFor.Credit.Person = person; if (roleType == CreditRoleType.Actor || roleType == CreditRoleType.Actress || roleType == CreditRoleType.Acting) { ActingCredit actingCredit = (ActingCredit)knownFor.Credit; actingCredit.Characters = new Character[] { new Character { Name = role } }; } knowForCredits.Add(knownFor); } person.KnownFor = knowForCredits; } #endregion }