/// <summary> /// Internal Method responsible to parse cast summary from related HTML Node /// </summary> /// <param name="creditSummaryNode">HTML Node that contains the credits summary section</param> /// <returns>Parsed credits array</returns> internal static List <Credit> GetCreditInfo(HtmlNode creditSummaryNode) { List <Credit> credits = new List <Credit>(); string role = creditSummaryNode.QuerySelector("h4").InnerText.Prepare(); CreditRoleType roleType = CreditRoleType.Undefined; if (IMDbConstants.DirectorsSummaryRegex.IsMatch(role)) { roleType = CreditRoleType.Director; } else if (IMDbConstants.StarsSummaryRegex.IsMatch(role)) { roleType = CreditRoleType.Acting; } else if (IMDbConstants.WritersSummaryRegex.IsMatch(role)) { roleType = CreditRoleType.Writer; } else if (IMDbConstants.CreatorsSummaryRegex.IsMatch(role)) { roleType = CreditRoleType.Creator; } foreach (HtmlNode creditNode in creditSummaryNode.QuerySelectorAll("a")) { Match personIDMatch = IMDbConstants.PersonIDURLMatcher.Match(creditNode.Attributes["href"].Value); if (personIDMatch.Success && personIDMatch.Groups.Count > 1) { if (roleType == CreditRoleType.Acting) { //Ignore acting credits, they are covered from cast summary } else { Credit credit = new Credit { Person = new Person { IMDbID = personIDMatch.Groups[1].Value.ToLong(), FullName = creditNode.InnerText.Prepare() }, RoleType = roleType }; credits.Add(credit); } } } return(credits); }
/// <summary> /// Method responsible for parsing the person page /// </summary> /// <param name="person">Person to be populated</param> /// <param name="documentNode">HTML Node containing the person page</param> /// <param name="settings">Object containing Data Fetch settings</param> public static void Parse(Person person, HtmlNode documentNode, PersonDataFetchSettings settings) { #region Main Details Parsing HtmlNode mainDetailsElement = documentNode.QuerySelector(".maindetails_center"); if (mainDetailsElement != null) { HtmlNode nameOverviewWidget = mainDetailsElement.QuerySelector(".name-overview-widget"); if (nameOverviewWidget != null) { HtmlNode nameContainer = nameOverviewWidget.QuerySelector("h1.header .itemprop"); if (nameContainer != null) { person.FullName = nameContainer.InnerText; } HtmlNode primaryImageElement = nameOverviewWidget.QuerySelector("#img_primary .image a img"); if (primaryImageElement != null) { Image image = new Image { Title = primaryImageElement.Attributes["title"].Value.Prepare(), URL = IMDBImageHelper.NormalizeImageUrl(primaryImageElement.Attributes["src"].Value) }; if (settings.FetchImageContents) { image.Content = IMDBImageHelper.GetImageContent(image.URL); } person.PrimaryImage = image; } HtmlNode jobCategoriesContainer = nameOverviewWidget.QuerySelector("div#name-job-categories"); if (jobCategoriesContainer != null) { List <CreditRoleType> roles = new List <CreditRoleType>(); foreach (HtmlNode jobCategoryLink in jobCategoriesContainer.QuerySelectorAll("a")) { CreditRoleType role = CreditRoleType.Undefined; string roleText = jobCategoryLink.InnerText.Prepare(); Enum.TryParse(roleText, out role); roles.Add(role); } person.Roles = roles; } List <Image> photos = new List <Image>(); HtmlNode mediaStripContainer = nameOverviewWidget.QuerySelector(".mediastrip_container"); if (mediaStripContainer != null) { HtmlNode[] allImageNodes = mediaStripContainer.QuerySelectorAll(".mediastrip a").ToArray(); int endIndex = allImageNodes.Length; if (settings.MediaImagesFetchCount < endIndex) { endIndex = settings.MediaImagesFetchCount; } for (int i = 0; i < endIndex; i++) { HtmlNode imageLink = allImageNodes[i]; HtmlNode imageNode = imageLink.Element("img"); Image image = new Image { Title = imageNode.Attributes["title"].Value.Prepare(), URL = IMDBImageHelper.NormalizeImageUrl(imageNode.Attributes["loadlate"].Value) }; if (settings.FetchImageContents) { image.Content = IMDBImageHelper.GetImageContent(image.URL); } photos.Add(image); } } person.Photos = photos; } else { HtmlNode nameHeader = documentNode.QuerySelector(".header"); if (nameHeader != null) { person.FullName = nameHeader.InnerText.Prepare(); } } } #endregion #region Bio Page Parsing if (settings.FetchBioPage) { BioPageHelper.ParseBioPage(person); } #endregion #region Filmography Parsing List <ProductionCredit> filmographyCredits = new List <ProductionCredit>(); HtmlNode filmographyElement = documentNode.QuerySelector("#filmography"); HtmlNode[] filmogpaphyCategories = documentNode.QuerySelectorAll(".filmo-category-section").ToArray(); DetectGender(person, filmogpaphyCategories); foreach (HtmlNode filmographyCategorySection in filmogpaphyCategories) { string categoryName = filmographyCategorySection.NodesBeforeSelf().FirstOrDefault(e => e.Name == "div").Attributes["data-category"].Value; categoryName = CultureInfo.InvariantCulture.TextInfo.ToTitleCase(categoryName.Replace("_", " ")); string categoryTypeString = categoryName.Replace(" ", string.Empty); CreditRoleType creditRoleType = CreditRoleType.Undefined; Enum.TryParse(categoryTypeString, out creditRoleType); } #endregion #region Known For Parsing HtmlNode knownForElement = documentNode.QuerySelector("#knownfor"); if (knownForElement != null) { List <ProductionCredit> knowForCredits = new List <ProductionCredit>(); foreach (HtmlNode knownForTitleNode in knownForElement.QuerySelectorAll(".knownfor-title")) { HtmlNode titleYearElement = knownForTitleNode.QuerySelector(".knownfor-year"); Match titleYearMatch = GeneralRegexConstants.PharantesisRegex.Match(titleYearElement.InnerText); int titleYear = default(int); int? titleEndYear = null; if (titleYearMatch.Success) { string titleYearString = titleYearMatch.Groups[1].Value; titleYearMatch = IMDbConstants.CreditYearRegex.Match(titleYearString); if (titleYearMatch.Success) { titleYear = titleYearMatch.Groups[1].Value.ToInteger(); if (titleYearMatch.Groups.Count >= 4) { titleEndYear = titleYearMatch.Groups[3].Value.ToInteger(); } } } HtmlNode roleElement = knownForTitleNode.QuerySelector(".knownfor-title-role"); HtmlNode movieLink = roleElement.Element("a"); ProductionCredit knownFor = new ProductionCredit(); if (titleEndYear != null) { knownFor.Production = new TVSeries { EndYear = (int)titleEndYear }; } else { knownFor.Production = new Movie(); } knownFor.Production.IMDbID = (long)IMDBIDHelper.GetIDFromUrl(movieLink.Attributes["href"].Value); knownFor.Production.Title = movieLink.InnerText.Prepare(); knownFor.Production.Year = titleYear; string role = roleElement.Element("span").InnerText.Prepare(); CreditRoleType roleType = CreditRoleType.Undefined; if (!Enum.TryParse <CreditRoleType>(role, out roleType)) { roleType = CreditRoleType.Acting; if (person.Gender == GenderEnum.Male) { roleType = CreditRoleType.Actor; } else if (person.Gender == GenderEnum.Female) { roleType = CreditRoleType.Actress; } } knownFor.Credit = new CreditFactory().Build(roleType); knownFor.Credit.RoleType = roleType; knownFor.Credit.Person = person; if (roleType == CreditRoleType.Actor || roleType == CreditRoleType.Actress || roleType == CreditRoleType.Acting) { ActingCredit actingCredit = (ActingCredit)knownFor.Credit; actingCredit.Characters = new Character[] { new Character { Name = role } }; } knowForCredits.Add(knownFor); } person.KnownFor = knowForCredits; } #endregion }