示例#1
0
        // Filmograpy and bio
        public bool GetActorDetails(IMDBUrl url, out IMDBActor actor)
        {
            actor = new IMDBActor();

            string[] vdbParserStr = VdbParserStringActorDetails();

            if (vdbParserStr == null || vdbParserStr.Length != 46)
            {
                return(false);
            }

            try
            {
                string absoluteUri;
                string strBody = GetPage(url.URL, "utf-8", out absoluteUri);

                if (strBody == null)
                {
                    return(false);
                }

                if (strBody.Length == 0)
                {
                    return(false);
                }

                #region Actor imdb id

                // IMDBActorID
                try
                {
                    int    pos = url.URL.LastIndexOf("nm");
                    string id  = url.URL.Substring(pos, 9).Replace("/", string.Empty);
                    actor.IMDBActorID = id;
                }
                catch (Exception) { }

                #endregion

                HTMLParser parser   = new HTMLParser(strBody);
                string     strThumb = string.Empty;
                string     value    = string.Empty;
                string     value2   = string.Empty;

                #region Actor name

                // Actor name
                if ((parser.skipToEndOf(vdbParserStr[0])) &&        // <title>
                    (parser.extractTo(vdbParserStr[1], ref value))) // - IMDb</title>
                {
                    value      = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value      = Util.Utils.RemoveParenthesis(value).Trim();
                    actor.Name = HttpUtility.HtmlDecode(value.Trim());
                }

                if (actor.Name == string.Empty)
                {
                    actor.Name = url.Title;
                }

                #endregion

                // Photo
                string parserTxt  = parser.Content;
                string photoBlock = string.Empty;

                #region Actor photo

                if (parser.skipToStartOf(vdbParserStr[2]) &&             // <td id="img_primary"
                    (parser.extractTo(vdbParserStr[3], ref photoBlock))) // </td>
                {
                    parser.Content = photoBlock;

                    if ((parser.skipToEndOf(vdbParserStr[4])) &&           // <img src="
                        (parser.extractTo(vdbParserStr[5], ref strThumb))) // "
                    {
                        actor.ThumbnailUrl = strThumb;
                    }
                    parser.Content = parserTxt;
                }

                #endregion

                #region Actor birth date

                // Birth date
                if ((parser.skipToEndOf(vdbParserStr[6])) &&          // >Born:</h4>
                    (parser.skipToEndOf(vdbParserStr[7])) &&          // birth_monthday=
                    (parser.skipToEndOf(vdbParserStr[8])) &&          // >
                    (parser.extractTo(vdbParserStr[9], ref value)) && // <
                    (parser.skipToEndOf(vdbParserStr[10])) &&         // year=
                    (parser.extractTo(vdbParserStr[11], ref value2))) // "

                {
                    actor.DateOfBirth = value + " " + value2;
                }

                #endregion

                #region Actor death date

                // Death date
                if ((parser.skipToEndOf(vdbParserStr[12])) &&          // >Died:</h4>
                    (parser.skipToEndOf(vdbParserStr[13])) &&          // death_monthday="
                    (parser.skipToEndOf(vdbParserStr[14])) &&          // >
                    (parser.extractTo(vdbParserStr[15], ref value)) && // <
                    (parser.skipToEndOf(vdbParserStr[16])) &&          // death_date="
                    (parser.extractTo(vdbParserStr[17], ref value2)))  // "
                {
                    actor.DateOfDeath = value + " " + value2;
                }

                #endregion

                parser.resetPosition();

                #region Actor birth place

                // Birth place
                if ((parser.skipToEndOf(vdbParserStr[18])) &&        // birth_place=
                    (parser.skipToEndOf(vdbParserStr[19])) &&        // >
                    (parser.extractTo(vdbParserStr[20], ref value))) // <
                {
                    actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
                }

                #endregion

                #region Actor death place

                // Death place
                if ((parser.skipToEndOf(vdbParserStr[21])) &&        // death_place=
                    (parser.skipToEndOf(vdbParserStr[22])) &&        // >
                    (parser.extractTo(vdbParserStr[23], ref value))) // <
                {
                    actor.PlaceOfDeath = HttpUtility.HtmlDecode(value);
                }

                #endregion

                //Mini Biography
                parser.resetPosition();

                #region Actor biography

                if ((parser.skipToEndOf(vdbParserStr[24])) &&        // <td id="overview-top">
                    (parser.skipToEndOf(vdbParserStr[25])) &&        // <p>
                    (parser.extractTo(vdbParserStr[26], ref value))) // See full bio</a>
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    actor.MiniBiography = Util.Utils.stripHTMLtags(value);
                    actor.MiniBiography = actor.MiniBiography.Replace(vdbParserStr[45], string.Empty).Trim(); // See full bio »
                    actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography);                        // Remove HTML entities like &#189;

                    if (actor.MiniBiography != string.Empty)
                    {
                        // get complete biography
                        string bioURL = absoluteUri;

                        if (!bioURL.EndsWith(vdbParserStr[27])) // /
                        {
                            bioURL += vdbParserStr[28];         // /bio
                        }
                        else
                        {
                            bioURL += vdbParserStr[29]; // bio
                        }

                        string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);

                        if (!string.IsNullOrEmpty(strBioBody))
                        {
                            HTMLParser parser1 = new HTMLParser(strBioBody);

                            if (parser1.skipToEndOf(vdbParserStr[30]) &&        // <h5>Mini Biography</h5>
                                parser1.skipToEndOf(vdbParserStr[31]) &&        // <div class="wikipedia_bio">
                                parser1.extractTo(vdbParserStr[32], ref value)) // </div>
                            {
                                value           = new HTMLUtil().ConvertHTMLToAnsi(value);
                                value           = Regex.Replace(value, @"</h5>\s<h5>", "\n\r");
                                value           = Regex.Replace(value, @"<h5>", "\n\r\n\r");
                                value           = Regex.Replace(value, @"</h5>", ":\n\r");
                                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                                actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
                            }
                            else
                            {
                                parser1.resetPosition();

                                if (parser1.skipToEndOf(vdbParserStr[33]) &&        // <h5>Mini Biography</h5>
                                    parser1.extractTo(vdbParserStr[34], ref value)) // </p>
                                {
                                    value           = new HTMLUtil().ConvertHTMLToAnsi(value);
                                    actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                                    actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
                                }
                            }
                        }
                    }
                }

                #endregion

                // Person is movie director or an actor/actress
                bool isActorPass    = false;
                bool isDirectorPass = false;
                bool isWriterPass   = false;

                parser.resetPosition();

                HTMLParser dirParser = new HTMLParser(); // HTML body for Director
                HTMLParser wriParser = new HTMLParser(); // HTML body for Writers

                #region Check person role in movie (actor, director or writer)

                if ((parser.skipToEndOf(vdbParserStr[35])) && // name="Director">Director</a>
                    (parser.skipToEndOf(vdbParserStr[36])))   // </div>
                {
                    isDirectorPass    = true;
                    dirParser.Content = parser.Content;
                }

                parser.resetPosition();

                if ((parser.skipToEndOf(vdbParserStr[37])) && // name="Writer">Writer</a>
                    (parser.skipToEndOf(vdbParserStr[38])))   // </div>
                {
                    isWriterPass      = true;
                    wriParser.Content = parser.Content;
                }

                parser.resetPosition();

                if (parser.skipToEndOf(vdbParserStr[39]) || // name="Actress">Actress</a>
                    parser.skipToEndOf(vdbParserStr[40]))   // name="Actor">Actor</a>
                {
                    isActorPass = true;
                }

                #endregion

                #region Get movies for every role

                // Get filmography Actor
                if (isActorPass)
                {
                    GetActorMovies(actor, parser, false, false);
                }

                // Get filmography for writers
                if (isWriterPass)
                {
                    parser = wriParser;
                    parser.resetPosition();

                    if ((parser.skipToEndOf(vdbParserStr[41])) && // name="Writer">Writer</a>
                        (parser.skipToEndOf(vdbParserStr[42])))   // </div>
                    {
                        GetActorMovies(actor, parser, false, true);
                    }
                }

                // Get filmography Director
                if (isDirectorPass)
                {
                    parser = dirParser;
                    parser.resetPosition();

                    if (parser.skipToEndOf(vdbParserStr[43]) && // name="Director">Director</a>
                        parser.skipToEndOf(vdbParserStr[44]))   // </div>
                    {
                        GetActorMovies(actor, parser, true, false);
                    }
                }

                #endregion

                // Add filmography
                if (actor.Count > 0)
                {
                    actor.SortActorMoviesByYear();
                }

                return(true);
            }
            catch (Exception ex)
            {
                Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
            }
            return(false);
        }
示例#2
0
        private void FindIMDBActor(string strURL)
        {
            string[] vdbParserStr = VdbParserStringActor();

            if (vdbParserStr == null || vdbParserStr.Length != 29)
            {
                return;
            }

            try
            {
                string absoluteUri;
                // UTF-8 have problem with special country chars, default IMDB enc is used
                string     strBody = GetPage(strURL, "utf-8", out absoluteUri);
                string     value   = string.Empty;
                HTMLParser parser  = new HTMLParser(strBody);

                if ((parser.skipToEndOf(vdbParserStr[0])) &&           // <title>
                    (parser.extractTo(vdbParserStr[1], ref value)) &&  // </title>
                    !value.ToLowerInvariant().Equals(vdbParserStr[2])) // imdb name search
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value = Util.Utils.RemoveParenthesis(value).Trim();
                    IMDBUrl oneUrl = new IMDBUrl(absoluteUri, value, "IMDB");
                    _elements.Add(oneUrl);
                    return;
                }

                parser.resetPosition();

                string popularBody = string.Empty;
                string exactBody   = string.Empty;
                string url         = string.Empty;
                string name        = string.Empty;
                string role        = string.Empty;

                if (parser.skipToStartOfNoCase(vdbParserStr[3]))        // Popular names
                {
                    parser.skipToEndOf(vdbParserStr[4]);                // <table>
                    parser.extractTo(vdbParserStr[5], ref popularBody); // </table>

                    parser = new HTMLParser(popularBody);

                    while (parser.skipToStartOf(vdbParserStr[6]))     // href="/name/
                    {
                        parser.skipToEndOf(vdbParserStr[7]);          // href="
                        parser.extractTo(vdbParserStr[8], ref url);   // "
                        parser.skipToEndOf(vdbParserStr[9]);          // Image()).src='/rg/find-name-
                        parser.skipToEndOf(vdbParserStr[10]);         // ';">
                        parser.extractTo(vdbParserStr[11], ref name); // </a>
                        parser.skipToEndOf(vdbParserStr[12]);         // <small>(
                        parser.extractTo(vdbParserStr[13], ref role); // ,

                        if (role != string.Empty)
                        {
                            name += " - " + role;
                        }

                        name = new HTMLUtil().ConvertHTMLToAnsi(name);
                        name = Util.Utils.RemoveParenthesis(name).Trim();
                        IMDBUrl newUrl = new IMDBUrl("http://www.imdb.com" + url, name, "IMDB");
                        _elements.Add(newUrl);
                        parser.skipToEndOf(vdbParserStr[14]); // </tr>
                    }
                }
                parser = new HTMLParser(strBody);

                if (parser.skipToStartOfNoCase(vdbParserStr[15]))      // Exact Matches
                {
                    parser.skipToEndOf(vdbParserStr[16]);              // <table>
                    parser.extractTo(vdbParserStr[17], ref exactBody); // </table>
                }
                else if (parser.skipToStartOfNoCase(vdbParserStr[18])) // Approx Matches
                {
                    parser.skipToEndOf(vdbParserStr[19]);              // <table>
                    parser.extractTo(vdbParserStr[20], ref exactBody); // </table>
                }
                else
                {
                    return;
                }

                parser = new HTMLParser(exactBody);
                url    = string.Empty;
                name   = string.Empty;
                role   = string.Empty;

                while (parser.skipToStartOf(vdbParserStr[21]))    // href="/name/
                {
                    parser.skipToEndOf(vdbParserStr[22]);         // href="
                    parser.extractTo(vdbParserStr[23], ref url);  // "
                    parser.skipToEndOf(vdbParserStr[24]);         // Image()).src='/rg/find-name-
                    parser.skipToEndOf(vdbParserStr[25]);         // ';">
                    parser.extractTo(vdbParserStr[26], ref name); // </a>
                    parser.skipToEndOf(vdbParserStr[27]);         // <small>(
                    parser.extractTo(vdbParserStr[28], ref role); // ,

                    if (role != string.Empty)
                    {
                        name += " - " + role;
                    }

                    name = new HTMLUtil().ConvertHTMLToAnsi(name);
                    name = Util.Utils.RemoveParenthesis(name).Trim();
                    IMDBUrl newUrl = new IMDBUrl("http://www.imdb.com" + url, name, "IMDB");
                    _elements.Add(newUrl);
                    parser.skipToEndOf(vdbParserStr[29]); // </tr>
                }
            }
            catch (Exception ex)
            {
                Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", strURL, ex.Message, ex.StackTrace);
            }
        }
示例#3
0
        // Changed - parsing all actor DB fields through HTML (IMDB changed HTML code)
        public bool GetActorDetails(IMDBUrl url, bool director, out IMDBActor actor)
        {
            actor = new IMDBActor();
            try
            {
                string absoluteUri;
                string strBody = GetPage(url.URL, "utf-8", out absoluteUri);
                if (strBody == null)
                {
                    return(false);
                }
                if (strBody.Length == 0)
                {
                    return(false);
                }
                // IMDBActorID
                try
                {
                    int    pos = url.URL.LastIndexOf("nm");
                    string id  = url.URL.Substring(pos, 9).Replace("/", string.Empty);
                    actor.IMDBActorID = id;
                }
                catch (Exception) {}

                HTMLParser parser   = new HTMLParser(strBody);
                string     strThumb = string.Empty;
                string     value    = string.Empty;
                string     value2   = string.Empty;
                // Actor name
                if ((parser.skipToEndOf("<title>")) &&
                    (parser.extractTo("- IMDb</title>", ref value)))
                {
                    value      = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value      = Util.Utils.RemoveParenthesis(value).Trim();
                    actor.Name = HttpUtility.HtmlDecode(value.Trim());
                }
                if (actor.Name == string.Empty)
                {
                    actor.Name = url.Title;
                }
                // Photo
                string parserTxt  = parser.Content;
                string photoBlock = string.Empty;
                if (parser.skipToStartOf("<td id=\"img_primary\"") &&
                    (parser.extractTo("</td>", ref photoBlock)))
                {
                    parser.Content = photoBlock;
                    if ((parser.skipToEndOf("<img src=\"")) &&
                        (parser.extractTo("\"", ref strThumb)))
                    {
                        actor.ThumbnailUrl = strThumb;
                    }
                    parser.Content = parserTxt;
                }
                // Birth date
                if ((parser.skipToEndOf("Born:")) &&
                    (parser.skipToEndOf("birth_monthday=")) &&
                    (parser.skipToEndOf(">")) &&
                    (parser.extractTo("<", ref value)) &&
                    (parser.skipToEndOf("year=")) &&
                    (parser.extractTo("\"", ref value2)))

                {
                    actor.DateOfBirth = value + " " + value2;
                }
                // Death date
                if ((parser.skipToEndOf(">Died:</h4>")) &&
                    (parser.skipToEndOf("deaths\">")) &&
                    (parser.extractTo("<", ref value)) &&
                    (parser.skipToEndOf("death_date=")) &&
                    (parser.extractTo("\"", ref value2)))
                {
                    if (actor.DateOfBirth == string.Empty)
                    {
                        actor.DateOfBirth = "?";
                    }
                    actor.DateOfBirth += " ~ " + value + " " + value2;
                }

                parser.resetPosition();
                // Birth place
                if ((parser.skipToEndOf("birth_place=")) &&
                    (parser.skipToEndOf(">")) &&
                    (parser.extractTo("<", ref value)))
                {
                    actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
                }
                //Mini Biography
                parser.resetPosition();
                if ((parser.skipToEndOf("<td id=\"overview-top\">")) &&
                    (parser.skipToEndOf("<p>")) &&
                    (parser.extractTo("See full bio</a>", ref value)))
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    actor.MiniBiography = Util.Utils.stripHTMLtags(value);
                    actor.MiniBiography = actor.MiniBiography.Replace("See full bio »", string.Empty).Trim();
                    actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like &#189;
                    if (actor.MiniBiography != string.Empty)
                    {
                        // get complete biography
                        string bioURL = absoluteUri;
                        if (!bioURL.EndsWith("/"))
                        {
                            bioURL += "/bio";
                        }
                        else
                        {
                            bioURL += "bio";
                        }
                        string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);
                        if (!string.IsNullOrEmpty(strBioBody))
                        {
                            HTMLParser parser1 = new HTMLParser(strBioBody);
                            if (parser1.skipToEndOf("<h5>Mini Biography</h5>") &&
                                parser1.extractTo("</p>", ref value))
                            {
                                value           = new HTMLUtil().ConvertHTMLToAnsi(value);
                                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                                actor.Biography = HttpUtility.HtmlDecode(actor.Biography); // Remove HTML entities like &#189;
                            }
                        }
                    }
                }
                // Person is movie director or an actor/actress
                bool isActorPass    = false;
                bool isDirectorPass = false;
                parser.resetPosition();

                if (director)
                {
                    if ((parser.skipToEndOf("name=\"Director\">Director</a>")) &&
                        (parser.skipToEndOf("</div>")))
                    {
                        isDirectorPass = true;
                    }
                }
                else
                {
                    if (parser.skipToEndOf("name=\"Actress\">Actress</a>") || parser.skipToEndOf("name=\"Actor\">Actor</a>"))
                    {
                        isActorPass = true;
                    }
                }
                // Get filmography
                if (isDirectorPass | isActorPass)
                {
                    string movies = string.Empty;
                    // Get films and roles block
                    if (parser.extractTo("<div id", ref movies))
                    {
                        parser.Content = movies;
                    }
                    // Parse block for evey film and get year, title and it's imdbID and role
                    while (parser.skipToStartOf("<span class=\"year_column\""))
                    {
                        string movie = string.Empty;
                        if (parser.extractTo("<div class", ref movie))
                        {
                            movie += "</li>";
                            HTMLParser movieParser = new HTMLParser(movie);
                            string     title       = string.Empty;
                            string     strYear     = string.Empty;
                            string     role        = string.Empty;
                            string     imdbID      = string.Empty;
                            // IMDBid
                            movieParser.skipToEndOf("title/");
                            movieParser.extractTo("/", ref imdbID);
                            // Title
                            movieParser.resetPosition();
                            movieParser.skipToEndOf("<a");
                            movieParser.skipToEndOf(">");
                            movieParser.extractTo("<br/>", ref title);
                            title = Util.Utils.stripHTMLtags(title);
                            title = title.Replace("\n", " ").Replace("\r", string.Empty);
                            title = HttpUtility.HtmlDecode(title.Trim()); // Remove HTML entities like &#189;
                            // Year
                            movieParser.resetPosition();
                            if (movieParser.skipToStartOf(">20") &&
                                movieParser.skipToEndOf(">"))
                            {
                                movieParser.extractTo("<", ref strYear);
                            }
                            else if (movieParser.skipToStartOf(">19") &&
                                     movieParser.skipToEndOf(">"))
                            {
                                movieParser.extractTo("<", ref strYear);
                            }
                            // Roles
                            if ((director == false) && (movieParser.skipToEndOf("<br/>"))) // Role case 1, no character link
                            {
                                movieParser.extractTo("<", ref role);
                                role = Util.Utils.stripHTMLtags(role).Trim();
                                role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                              .Replace("\r", string.Empty).Trim());
                                if (role == string.Empty) // Role case 2, with character link
                                {
                                    movieParser.resetPosition();
                                    movieParser.skipToEndOf("<br/>");
                                    movieParser.extractTo("</a>", ref role);
                                    role = Util.Utils.stripHTMLtags(role).Trim();
                                    role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                                  .Replace("\r", string.Empty).Trim());
                                }
                            }
                            else
                            {
                                // Just director
                                if (director)
                                {
                                    role = "Director";
                                }
                            }

                            int year = 0;
                            try
                            {
                                year = Int32.Parse(strYear.Substring(0, 4));
                            }
                            catch (Exception)
                            {
                                year = 1900;
                            }
                            IMDBActor.IMDBActorMovie actorMovie = new IMDBActor.IMDBActorMovie();
                            actorMovie.MovieTitle = title;
                            actorMovie.Role       = role;
                            actorMovie.Year       = year;
                            actorMovie.imdbID     = imdbID;
                            actor.Add(actorMovie);
                        }
                    }
                }
                return(true);
            }
            catch (Exception ex)
            {
                Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
            }
            return(false);
        }
示例#4
0
        private void GetActorMovies(IMDBActor actor, HTMLParser parser, bool director, bool writer)
        {
            string[] vdbParserStr = VdbParserStringActorMovies();

            if (vdbParserStr == null || vdbParserStr.Length != 19)
            {
                return;
            }

            string movies = string.Empty;

            // Get films and roles block
            if (parser.extractTo(vdbParserStr[0], ref movies)) // <div id
            {
                parser.Content = movies;
            }

            // Parse block for evey film and get year, title and it's imdbID and role
            while (parser.skipToStartOf(vdbParserStr[1])) // <span class="year_column"
            {
                string movie = string.Empty;

                if (parser.extractTo(vdbParserStr[2], ref movie)) // <div class
                {
                    movie += vdbParserStr[3];                     // </li>

                    HTMLParser movieParser = new HTMLParser(movie);
                    string     title       = string.Empty;
                    string     strYear     = string.Empty;
                    string     role        = string.Empty;
                    string     imdbID      = string.Empty;

                    // IMDBid
                    movieParser.skipToEndOf(vdbParserStr[4]);           // title/
                    movieParser.extractTo(vdbParserStr[5], ref imdbID); // /

                    // Title
                    movieParser.resetPosition();
                    movieParser.skipToEndOf(vdbParserStr[6]);          // <a
                    movieParser.skipToEndOf(vdbParserStr[7]);          // >
                    movieParser.extractTo(vdbParserStr[8], ref title); // <br/>
                    title = CleanCrlf(title);

                    if (!SkipNoMovies(title))
                    {
                        // Year
                        movieParser.resetPosition();

                        if (movieParser.skipToStartOf(vdbParserStr[9]) &&         // year_column">20
                            movieParser.skipToEndOf(vdbParserStr[10]))            // >
                        {
                            movieParser.extractTo(vdbParserStr[11], ref strYear); // <
                        }
                        else
                        {
                            movieParser.resetPosition();

                            if (movieParser.skipToStartOf(vdbParserStr[12]) &&        // year_column">19
                                movieParser.skipToEndOf(vdbParserStr[13]))            // >
                            {
                                movieParser.extractTo(vdbParserStr[14], ref strYear); // <
                            }
                        }

                        strYear = strYear.Trim();

                        if (strYear.Length > 4)
                        {
                            strYear = strYear.Substring(0, 4);
                        }

                        // Roles actor
                        if (!director && !writer)
                        {
                            // Role case 1, no character link
                            if (movieParser.skipToEndOf(vdbParserStr[15]))         // <br/>
                            {
                                movieParser.extractTo(vdbParserStr[16], ref role); // <
                                role = CleanCrlf(role);

                                // Role case 2, with character link
                                if (role == string.Empty)
                                {
                                    movieParser.resetPosition();
                                    movieParser.skipToEndOf(vdbParserStr[17]);         // <br/>
                                    movieParser.extractTo(vdbParserStr[18], ref role); // </a>
                                    role = CleanCrlf(role);
                                }
                            }
                        }
                        else if (director)
                        {
                            role = GUILocalizeStrings.Get(199).Replace(":", string.Empty);
                        }
                        else // Writer
                        {
                            string wRole = string.Empty;

                            if (title != null)
                            {
                                // Check for cases like "(movie type)(role)" and use "(role)" only
                                MatchCollection mc = Regex.Matches(title, @"\([^)]+\)");

                                if (mc.Count > 0)
                                {
                                    if (mc.Count > 1)
                                    {
                                        wRole = mc[mc.Count - 1].Value;
                                    }
                                    else
                                    {
                                        wRole = mc[0].Value;
                                    }
                                }
                                else
                                {
                                    continue;
                                }

                                if (!string.IsNullOrEmpty(wRole))
                                {
                                    // Remove parentheses (leave text inside)
                                    wRole = Regex.Replace(wRole, "([(]|[)])", string.Empty);
                                    role  = GUILocalizeStrings.Get(200) + " " + wRole;
                                }
                                else
                                {
                                    role = GUILocalizeStrings.Get(200).Replace(":", string.Empty);
                                }
                            }
                        }

                        int year = 0;
                        // Set near future for movies without year (99% it's a future project)
                        if (!Int32.TryParse(strYear, out year))
                        {
                            year = DateTime.Today.Year + 3;
                        }

                        IMDBActor.IMDBActorMovie actorMovie = new IMDBActor.IMDBActorMovie();
                        title = Util.Utils.RemoveParenthesis(title).Trim();
                        role  = Util.Utils.RemoveParenthesis(role).Trim();
                        actorMovie.MovieTitle  = title;
                        actorMovie.Role        = role;
                        actorMovie.Year        = year;
                        actorMovie.MovieImdbID = imdbID;
                        // Check if director/writer movie exists in actors movies, concatenate role
                        // to already fetched actor movie (no duplicate movie entries)
                        bool skipAdd = false;

                        if (writer)
                        {
                            for (int i = 0; i < actor.Count; i++)
                            {
                                if (actor[i].MovieImdbID == imdbID)
                                {
                                    if (actor[i].Role != string.Empty)
                                    {
                                        actor[i].Role = role + ", " + actor[i].Role;
                                    }
                                    else
                                    {
                                        actor[i].Role = role;
                                    }

                                    skipAdd = true;
                                    break;
                                }
                            }
                        }

                        if (director)
                        {
                            for (int i = 0; i < actor.Count; i++)
                            {
                                if (actor[i].MovieImdbID == imdbID)
                                {
                                    if (actor[i].Role != string.Empty)
                                    {
                                        actor[i].Role = role + ", " + actor[i].Role;
                                    }
                                    else
                                    {
                                        actor[i].Role = role;
                                    }
                                    skipAdd = true;
                                    break;
                                }
                            }
                        }

                        if (!skipAdd)
                        {
                            actor.Add(actorMovie);
                        }
                    }
                }
            }
        }
示例#5
0
        // Changed - IMDB changed HTML code
        private void FindIMDBActor(string strURL)
        {
            try
            {
                string absoluteUri;
                // UTF-8 have problem with special country chars, default IMDB enc is used
                string     strBody = GetPage(strURL, "utf-8", out absoluteUri);
                string     value   = string.Empty;
                HTMLParser parser  = new HTMLParser(strBody);
                if ((parser.skipToEndOf("<title>")) &&
                    (parser.extractTo("</title>", ref value)) && !value.ToLower().Equals("imdb name search"))
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value = Util.Utils.RemoveParenthesis(value).Trim();
                    IMDBUrl oneUrl = new IMDBUrl(absoluteUri, value, "IMDB");
                    _elements.Add(oneUrl);
                    return;
                }
                parser.resetPosition();

                while (parser.skipToEndOfNoCase("Exact Matches"))
                {
                    string url  = string.Empty;
                    string name = string.Empty;
                    //<a href="/name/nm0000246/" onclick="set_args('nm0000246', 1)">Bruce Willis</a>
                    if (parser.skipToStartOf("href=\"/name/"))
                    {
                        parser.skipToEndOf("href=\"");
                        parser.extractTo("\"", ref url);
                        parser.skipToEndOf("<br><a");
                        parser.skipToEndOf(">");
                        parser.extractTo("</a>", ref name);
                        name = new HTMLUtil().ConvertHTMLToAnsi(name);
                        name = Util.Utils.RemoveParenthesis(name).Trim();
                        IMDBUrl newUrl = new IMDBUrl("http://akas.imdb.com" + url, name, "IMDB");
                        _elements.Add(newUrl);
                    }
                    else
                    {
                        parser.skipToEndOfNoCase("</a>");
                    }
                }
                // Maybe more actors with the similar name
                parser.resetPosition();

                while (parser.skipToEndOfNoCase("Popular Names"))
                {
                    string url  = string.Empty;
                    string name = string.Empty;
                    //<a href="/name/nm0000246/" onclick="set_args('nm0000246', 1)">Bruce Willis</a>
                    if (parser.skipToStartOf("href=\"/name/"))
                    {
                        parser.skipToEndOf("href=\"");
                        parser.extractTo("\"", ref url);
                        parser.skipToEndOf("<br><a");
                        parser.skipToEndOf(">");
                        parser.extractTo("</a>", ref name);
                        name = new HTMLUtil().ConvertHTMLToAnsi(name);
                        name = Util.Utils.RemoveParenthesis(name).Trim();
                        IMDBUrl newUrl = new IMDBUrl("http://akas.imdb.com" + url, name, "IMDB");
                        _elements.Add(newUrl);
                    }
                    else
                    {
                        parser.skipToEndOfNoCase("</a>");
                    }
                }
            }
            catch (Exception ex)
            {
                Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", strURL, ex.Message, ex.StackTrace);
            }
        }