Beispiel #1
0
        /// <summary>
        /// Internal Method responsible to parse cast summary from related HTML Node
        /// </summary>
        /// <param name="creditSummaryNode">HTML Node that contains the credits summary section</param>
        /// <returns>Parsed credits array</returns>
        internal static List <Credit> GetCreditInfo(HtmlNode creditSummaryNode)
        {
            List <Credit>  credits  = new List <Credit>();
            string         role     = creditSummaryNode.QuerySelector("h4").InnerText.Prepare();
            CreditRoleType roleType = CreditRoleType.Undefined;

            if (IMDbConstants.DirectorsSummaryRegex.IsMatch(role))
            {
                roleType = CreditRoleType.Director;
            }
            else if (IMDbConstants.StarsSummaryRegex.IsMatch(role))
            {
                roleType = CreditRoleType.Acting;
            }
            else if (IMDbConstants.WritersSummaryRegex.IsMatch(role))
            {
                roleType = CreditRoleType.Writer;
            }
            else if (IMDbConstants.CreatorsSummaryRegex.IsMatch(role))
            {
                roleType = CreditRoleType.Creator;
            }

            foreach (HtmlNode creditNode in creditSummaryNode.QuerySelectorAll("a"))
            {
                Match personIDMatch = IMDbConstants.PersonIDURLMatcher.Match(creditNode.Attributes["href"].Value);
                if (personIDMatch.Success && personIDMatch.Groups.Count > 1)
                {
                    if (roleType == CreditRoleType.Acting)
                    {
                        //Ignore acting credits, they are covered from cast summary
                    }
                    else
                    {
                        Credit credit = new Credit
                        {
                            Person = new Person
                            {
                                IMDbID   = personIDMatch.Groups[1].Value.ToLong(),
                                FullName = creditNode.InnerText.Prepare()
                            },
                            RoleType = roleType
                        };
                        credits.Add(credit);
                    }
                }
            }

            return(credits);
        }
Beispiel #2
0
        /// <summary>
        /// Method responsible for parsing the person page
        /// </summary>
        /// <param name="person">Person to be populated</param>
        /// <param name="documentNode">HTML Node containing the person page</param>
        /// <param name="settings">Object containing Data Fetch settings</param>
        public static void Parse(Person person, HtmlNode documentNode, PersonDataFetchSettings settings)
        {
            #region Main Details Parsing
            HtmlNode mainDetailsElement = documentNode.QuerySelector(".maindetails_center");
            if (mainDetailsElement != null)
            {
                HtmlNode nameOverviewWidget = mainDetailsElement.QuerySelector(".name-overview-widget");
                if (nameOverviewWidget != null)
                {
                    HtmlNode nameContainer = nameOverviewWidget.QuerySelector("h1.header .itemprop");
                    if (nameContainer != null)
                    {
                        person.FullName = nameContainer.InnerText;
                    }

                    HtmlNode primaryImageElement = nameOverviewWidget.QuerySelector("#img_primary .image a img");
                    if (primaryImageElement != null)
                    {
                        Image image = new Image
                        {
                            Title = primaryImageElement.Attributes["title"].Value.Prepare(),
                            URL   = IMDBImageHelper.NormalizeImageUrl(primaryImageElement.Attributes["src"].Value)
                        };
                        if (settings.FetchImageContents)
                        {
                            image.Content = IMDBImageHelper.GetImageContent(image.URL);
                        }
                        person.PrimaryImage = image;
                    }

                    HtmlNode jobCategoriesContainer = nameOverviewWidget.QuerySelector("div#name-job-categories");
                    if (jobCategoriesContainer != null)
                    {
                        List <CreditRoleType> roles = new List <CreditRoleType>();
                        foreach (HtmlNode jobCategoryLink in jobCategoriesContainer.QuerySelectorAll("a"))
                        {
                            CreditRoleType role     = CreditRoleType.Undefined;
                            string         roleText = jobCategoryLink.InnerText.Prepare();
                            Enum.TryParse(roleText, out role);
                            roles.Add(role);
                        }

                        person.Roles = roles;
                    }

                    List <Image> photos = new List <Image>();
                    HtmlNode     mediaStripContainer = nameOverviewWidget.QuerySelector(".mediastrip_container");
                    if (mediaStripContainer != null)
                    {
                        HtmlNode[] allImageNodes = mediaStripContainer.QuerySelectorAll(".mediastrip a").ToArray();
                        int        endIndex      = allImageNodes.Length;
                        if (settings.MediaImagesFetchCount < endIndex)
                        {
                            endIndex = settings.MediaImagesFetchCount;
                        }

                        for (int i = 0; i < endIndex; i++)
                        {
                            HtmlNode imageLink = allImageNodes[i];
                            HtmlNode imageNode = imageLink.Element("img");
                            Image    image     = new Image
                            {
                                Title = imageNode.Attributes["title"].Value.Prepare(),
                                URL   = IMDBImageHelper.NormalizeImageUrl(imageNode.Attributes["loadlate"].Value)
                            };
                            if (settings.FetchImageContents)
                            {
                                image.Content = IMDBImageHelper.GetImageContent(image.URL);
                            }
                            photos.Add(image);
                        }
                    }
                    person.Photos = photos;
                }
                else
                {
                    HtmlNode nameHeader = documentNode.QuerySelector(".header");
                    if (nameHeader != null)
                    {
                        person.FullName = nameHeader.InnerText.Prepare();
                    }
                }
            }
            #endregion
            #region Bio Page Parsing
            if (settings.FetchBioPage)
            {
                BioPageHelper.ParseBioPage(person);
            }
            #endregion
            #region Filmography Parsing
            List <ProductionCredit> filmographyCredits = new List <ProductionCredit>();
            HtmlNode   filmographyElement    = documentNode.QuerySelector("#filmography");
            HtmlNode[] filmogpaphyCategories = documentNode.QuerySelectorAll(".filmo-category-section").ToArray();
            DetectGender(person, filmogpaphyCategories);

            foreach (HtmlNode filmographyCategorySection in filmogpaphyCategories)
            {
                string categoryName = filmographyCategorySection.NodesBeforeSelf().FirstOrDefault(e => e.Name == "div").Attributes["data-category"].Value;
                categoryName = CultureInfo.InvariantCulture.TextInfo.ToTitleCase(categoryName.Replace("_", " "));
                string         categoryTypeString = categoryName.Replace(" ", string.Empty);
                CreditRoleType creditRoleType     = CreditRoleType.Undefined;
                Enum.TryParse(categoryTypeString, out creditRoleType);
            }
            #endregion
            #region Known For Parsing
            HtmlNode knownForElement = documentNode.QuerySelector("#knownfor");
            if (knownForElement != null)
            {
                List <ProductionCredit> knowForCredits = new List <ProductionCredit>();
                foreach (HtmlNode knownForTitleNode in knownForElement.QuerySelectorAll(".knownfor-title"))
                {
                    HtmlNode titleYearElement = knownForTitleNode.QuerySelector(".knownfor-year");
                    Match    titleYearMatch   = GeneralRegexConstants.PharantesisRegex.Match(titleYearElement.InnerText);
                    int      titleYear        = default(int);
                    int?     titleEndYear     = null;
                    if (titleYearMatch.Success)
                    {
                        string titleYearString = titleYearMatch.Groups[1].Value;
                        titleYearMatch = IMDbConstants.CreditYearRegex.Match(titleYearString);
                        if (titleYearMatch.Success)
                        {
                            titleYear = titleYearMatch.Groups[1].Value.ToInteger();
                            if (titleYearMatch.Groups.Count >= 4)
                            {
                                titleEndYear = titleYearMatch.Groups[3].Value.ToInteger();
                            }
                        }
                    }

                    HtmlNode         roleElement = knownForTitleNode.QuerySelector(".knownfor-title-role");
                    HtmlNode         movieLink   = roleElement.Element("a");
                    ProductionCredit knownFor    = new ProductionCredit();
                    if (titleEndYear != null)
                    {
                        knownFor.Production = new TVSeries {
                            EndYear = (int)titleEndYear
                        };
                    }
                    else
                    {
                        knownFor.Production = new Movie();
                    }

                    knownFor.Production.IMDbID = (long)IMDBIDHelper.GetIDFromUrl(movieLink.Attributes["href"].Value);
                    knownFor.Production.Title  = movieLink.InnerText.Prepare();
                    knownFor.Production.Year   = titleYear;

                    string         role     = roleElement.Element("span").InnerText.Prepare();
                    CreditRoleType roleType = CreditRoleType.Undefined;
                    if (!Enum.TryParse <CreditRoleType>(role, out roleType))
                    {
                        roleType = CreditRoleType.Acting;
                        if (person.Gender == GenderEnum.Male)
                        {
                            roleType = CreditRoleType.Actor;
                        }
                        else if (person.Gender == GenderEnum.Female)
                        {
                            roleType = CreditRoleType.Actress;
                        }
                    }

                    knownFor.Credit          = new CreditFactory().Build(roleType);
                    knownFor.Credit.RoleType = roleType;
                    knownFor.Credit.Person   = person;
                    if (roleType == CreditRoleType.Actor || roleType == CreditRoleType.Actress || roleType == CreditRoleType.Acting)
                    {
                        ActingCredit actingCredit = (ActingCredit)knownFor.Credit;
                        actingCredit.Characters = new Character[]
                        {
                            new Character
                            {
                                Name = role
                            }
                        };
                    }

                    knowForCredits.Add(knownFor);
                }
                person.KnownFor = knowForCredits;
            }
            #endregion
        }