// Filmograpy and bio public bool GetActorDetails(IMDBUrl url, out IMDBActor actor) { actor = new IMDBActor(); string[] vdbParserStr = VdbParserStringActorDetails(); if (vdbParserStr == null || vdbParserStr.Length != 46) { return false; } try { string absoluteUri; string strBody = GetPage(url.URL, "utf-8", out absoluteUri); if (strBody == null) { return false; } if (strBody.Length == 0) { return false; } #region Actor imdb id // IMDBActorID try { int pos = url.URL.LastIndexOf("nm"); string id = url.URL.Substring(pos, 9).Replace("/", string.Empty); actor.IMDBActorID = id; } catch (Exception) { } #endregion HTMLParser parser = new HTMLParser(strBody); string strThumb = string.Empty; string value = string.Empty; string value2 = string.Empty; #region Actor name // Actor name if ((parser.skipToEndOf(vdbParserStr[0])) && // <title> (parser.extractTo(vdbParserStr[1], ref value))) // - IMDb</title> { value = new HTMLUtil().ConvertHTMLToAnsi(value); value = Util.Utils.RemoveParenthesis(value).Trim(); actor.Name = HttpUtility.HtmlDecode(value.Trim()); } if (actor.Name == string.Empty) { actor.Name = url.Title; } #endregion // Photo string parserTxt = parser.Content; string photoBlock = string.Empty; #region Actor photo if (parser.skipToStartOf(vdbParserStr[2]) && // <td id="img_primary" (parser.extractTo(vdbParserStr[3], ref photoBlock))) // </td> { parser.Content = photoBlock; if ((parser.skipToEndOf(vdbParserStr[4])) && // <img src=" (parser.extractTo(vdbParserStr[5], ref strThumb))) // " { actor.ThumbnailUrl = strThumb; } parser.Content = parserTxt; } #endregion #region Actor birth date // Birth date if ((parser.skipToEndOf(vdbParserStr[6])) && // >Born:</h4> (parser.skipToEndOf(vdbParserStr[7])) && // birth_monthday= (parser.skipToEndOf(vdbParserStr[8])) && // > (parser.extractTo(vdbParserStr[9], ref value)) && // < (parser.skipToEndOf(vdbParserStr[10])) && // year= (parser.extractTo(vdbParserStr[11], ref value2))) // " { actor.DateOfBirth = value + " " + value2; } #endregion #region Actor death date // Death date if ((parser.skipToEndOf(vdbParserStr[12])) && // >Died:</h4> (parser.skipToEndOf(vdbParserStr[13])) && // death_monthday=" (parser.skipToEndOf(vdbParserStr[14])) && // > (parser.extractTo(vdbParserStr[15], ref value)) && // < (parser.skipToEndOf(vdbParserStr[16])) && // death_date=" (parser.extractTo(vdbParserStr[17], ref value2))) // " { actor.DateOfDeath = value + " " + value2; } #endregion parser.resetPosition(); #region Actor birth place // Birth place if ((parser.skipToEndOf(vdbParserStr[18])) && // birth_place= (parser.skipToEndOf(vdbParserStr[19])) && // > (parser.extractTo(vdbParserStr[20], ref value))) // < { actor.PlaceOfBirth = HttpUtility.HtmlDecode(value); } #endregion #region Actor death place // Death place if ((parser.skipToEndOf(vdbParserStr[21])) && // death_place= (parser.skipToEndOf(vdbParserStr[22])) && // > (parser.extractTo(vdbParserStr[23], ref value))) // < { actor.PlaceOfDeath = HttpUtility.HtmlDecode(value); } #endregion //Mini Biography parser.resetPosition(); #region Actor biography if ((parser.skipToEndOf(vdbParserStr[24])) && // <td id="overview-top"> (parser.skipToEndOf(vdbParserStr[25])) && // <p> (parser.extractTo(vdbParserStr[26], ref value))) // See full bio</a> { value = new HTMLUtil().ConvertHTMLToAnsi(value); actor.MiniBiography = Util.Utils.stripHTMLtags(value); actor.MiniBiography = actor.MiniBiography.Replace(vdbParserStr[45], string.Empty).Trim(); // See full bio » actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like ½ if (actor.MiniBiography != string.Empty) { // get complete biography string bioURL = absoluteUri; if (!bioURL.EndsWith(vdbParserStr[27])) // / { bioURL += vdbParserStr[28]; // /bio } else { bioURL += vdbParserStr[29]; // bio } string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri); if (!string.IsNullOrEmpty(strBioBody)) { HTMLParser parser1 = new HTMLParser(strBioBody); if (parser1.skipToEndOf(vdbParserStr[30]) && // <h5>Mini Biography</h5> parser1.skipToEndOf(vdbParserStr[31]) && // <div class="wikipedia_bio"> parser1.extractTo(vdbParserStr[32], ref value)) // </div> { value = new HTMLUtil().ConvertHTMLToAnsi(value); value = Regex.Replace(value, @"</h5>\s<h5>", "\n\r"); value = Regex.Replace(value, @"<h5>", "\n\r\n\r"); value = Regex.Replace(value, @"</h5>", ":\n\r"); actor.Biography = Util.Utils.stripHTMLtags(value).Trim(); actor.Biography = HttpUtility.HtmlDecode(actor.Biography); } else { parser1.resetPosition(); if (parser1.skipToEndOf(vdbParserStr[33]) && // <h5>Mini Biography</h5> parser1.extractTo(vdbParserStr[34], ref value)) // </p> { value = new HTMLUtil().ConvertHTMLToAnsi(value); actor.Biography = Util.Utils.stripHTMLtags(value).Trim(); actor.Biography = HttpUtility.HtmlDecode(actor.Biography); } } } } } #endregion // Person is movie director or an actor/actress bool isActorPass = false; bool isDirectorPass = false; bool isWriterPass = false; parser.resetPosition(); HTMLParser dirParser = new HTMLParser(); // HTML body for Director HTMLParser wriParser = new HTMLParser(); // HTML body for Writers #region Check person role in movie (actor, director or writer) if ((parser.skipToEndOf(vdbParserStr[35])) && // name="Director">Director</a> (parser.skipToEndOf(vdbParserStr[36]))) // </div> { isDirectorPass = true; dirParser.Content = parser.Content; } parser.resetPosition(); if ((parser.skipToEndOf(vdbParserStr[37])) && // name="Writer">Writer</a> (parser.skipToEndOf(vdbParserStr[38]))) // </div> { isWriterPass = true; wriParser.Content = parser.Content; } parser.resetPosition(); if (parser.skipToEndOf(vdbParserStr[39]) || // name="Actress">Actress</a> parser.skipToEndOf(vdbParserStr[40])) // name="Actor">Actor</a> { isActorPass = true; } #endregion #region Get movies for every role // Get filmography Actor if (isActorPass) { GetActorMovies(actor, parser, false, false); } // Get filmography for writers if (isWriterPass) { parser = wriParser; parser.resetPosition(); if ((parser.skipToEndOf(vdbParserStr[41])) && // name="Writer">Writer</a> (parser.skipToEndOf(vdbParserStr[42]))) // </div> { GetActorMovies(actor, parser, false, true); } } // Get filmography Director if (isDirectorPass) { parser = dirParser; parser.resetPosition(); if (parser.skipToEndOf(vdbParserStr[43]) && // name="Director">Director</a> parser.skipToEndOf(vdbParserStr[44])) // </div> { GetActorMovies(actor, parser, true, false); } } #endregion // Add filmography if (actor.Count > 0) { actor.SortActorMoviesByYear(); } return true; } catch (Exception ex) { Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace); } return false; }
// Changed - parsing all actor DB fields through HTML (IMDB changed HTML code) public bool GetActorDetails(IMDBUrl url, bool director, out IMDBActor actor) { actor = new IMDBActor(); try { string absoluteUri; string strBody = GetPage(url.URL, "utf-8", out absoluteUri); if (strBody == null) { return false; } if (strBody.Length == 0) { return false; } // IMDBActorID try { int pos = url.URL.LastIndexOf("nm"); string id = url.URL.Substring(pos, 9).Replace("/", string.Empty); actor.IMDBActorID = id; } catch (Exception) {} HTMLParser parser = new HTMLParser(strBody); string strThumb = string.Empty; string value = string.Empty; string value2 = string.Empty; // Actor name if ((parser.skipToEndOf("<title>")) && (parser.extractTo("- IMDb</title>", ref value))) { value = new HTMLUtil().ConvertHTMLToAnsi(value); value = Util.Utils.RemoveParenthesis(value).Trim(); actor.Name = HttpUtility.HtmlDecode(value.Trim()); } if (actor.Name == string.Empty) { actor.Name = url.Title; } // Photo string parserTxt = parser.Content; string photoBlock = string.Empty; if (parser.skipToStartOf("<td id=\"img_primary\"") && (parser.extractTo("</td>", ref photoBlock))) { parser.Content = photoBlock; if ((parser.skipToEndOf("<img src=\"")) && (parser.extractTo("\"", ref strThumb))) { actor.ThumbnailUrl = strThumb; } parser.Content = parserTxt; } // Birth date if ((parser.skipToEndOf("Born:")) && (parser.skipToEndOf("birth_monthday=")) && (parser.skipToEndOf(">")) && (parser.extractTo("<", ref value)) && (parser.skipToEndOf("year=")) && (parser.extractTo("\"", ref value2))) { actor.DateOfBirth = value + " " + value2; } // Death date if ((parser.skipToEndOf(">Died:</h4>")) && (parser.skipToEndOf("deaths\">")) && (parser.extractTo("<", ref value)) && (parser.skipToEndOf("death_date=")) && (parser.extractTo("\"", ref value2))) { if (actor.DateOfBirth == string.Empty) actor.DateOfBirth = "?"; actor.DateOfBirth += " ~ " + value + " " + value2; } parser.resetPosition(); // Birth place if ((parser.skipToEndOf("birth_place=")) && (parser.skipToEndOf(">")) && (parser.extractTo("<", ref value))) { actor.PlaceOfBirth = HttpUtility.HtmlDecode(value); } //Mini Biography parser.resetPosition(); if ((parser.skipToEndOf("<td id=\"overview-top\">")) && (parser.skipToEndOf("<p>")) && (parser.extractTo("See full bio</a>", ref value))) { value = new HTMLUtil().ConvertHTMLToAnsi(value); actor.MiniBiography = Util.Utils.stripHTMLtags(value); actor.MiniBiography = actor.MiniBiography.Replace("See full bio »", string.Empty).Trim(); actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like ½ if (actor.MiniBiography != string.Empty) { // get complete biography string bioURL = absoluteUri; if (!bioURL.EndsWith("/")) { bioURL += "/bio"; } else bioURL += "bio"; string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri); if (!string.IsNullOrEmpty(strBioBody)) { HTMLParser parser1 = new HTMLParser(strBioBody); if (parser1.skipToEndOf("<h5>Mini Biography</h5>") && parser1.extractTo("</p>", ref value)) { value = new HTMLUtil().ConvertHTMLToAnsi(value); actor.Biography = Util.Utils.stripHTMLtags(value).Trim(); actor.Biography = HttpUtility.HtmlDecode(actor.Biography); // Remove HTML entities like ½ } } } } // Person is movie director or an actor/actress bool isActorPass = false; bool isDirectorPass = false; parser.resetPosition(); if (director) { if ((parser.skipToEndOf("name=\"Director\">Director</a>")) && (parser.skipToEndOf("</div>"))) { isDirectorPass = true; } } else { if (parser.skipToEndOf("name=\"Actress\">Actress</a>") || parser.skipToEndOf("name=\"Actor\">Actor</a>")) { isActorPass = true; } } // Get filmography if (isDirectorPass | isActorPass) { string movies = string.Empty; // Get films and roles block if (parser.extractTo("<div id", ref movies)) { parser.Content = movies; } // Parse block for evey film and get year, title and it's imdbID and role while (parser.skipToStartOf("<span class=\"year_column\"")) { string movie = string.Empty; if (parser.extractTo("<div class", ref movie)) { movie += "</li>"; HTMLParser movieParser = new HTMLParser(movie); string title = string.Empty; string strYear = string.Empty; string role = string.Empty; string imdbID = string.Empty; // IMDBid movieParser.skipToEndOf("title/"); movieParser.extractTo("/", ref imdbID); // Title movieParser.resetPosition(); movieParser.skipToEndOf("<a"); movieParser.skipToEndOf(">"); movieParser.extractTo("<br/>", ref title); title = Util.Utils.stripHTMLtags(title); title = title.Replace("\n", " ").Replace("\r", string.Empty); title = HttpUtility.HtmlDecode(title.Trim()); // Remove HTML entities like ½ // Year movieParser.resetPosition(); if (movieParser.skipToStartOf(">20") && movieParser.skipToEndOf(">")) { movieParser.extractTo("<", ref strYear); } else if (movieParser.skipToStartOf(">19") && movieParser.skipToEndOf(">")) { movieParser.extractTo("<", ref strYear); } // Roles if ((director == false) && (movieParser.skipToEndOf("<br/>"))) // Role case 1, no character link { movieParser.extractTo("<", ref role); role = Util.Utils.stripHTMLtags(role).Trim(); role = HttpUtility.HtmlDecode(role.Replace("\n", " ") .Replace("\r", string.Empty).Trim()); if (role == string.Empty) // Role case 2, with character link { movieParser.resetPosition(); movieParser.skipToEndOf("<br/>"); movieParser.extractTo("</a>", ref role); role = Util.Utils.stripHTMLtags(role).Trim(); role = HttpUtility.HtmlDecode(role.Replace("\n", " ") .Replace("\r", string.Empty).Trim()); } } else { // Just director if (director) role = "Director"; } int year = 0; try { year = Int32.Parse(strYear.Substring(0, 4)); } catch (Exception) { year = 1900; } IMDBActor.IMDBActorMovie actorMovie = new IMDBActor.IMDBActorMovie(); actorMovie.MovieTitle = title; actorMovie.Role = role; actorMovie.Year = year; actorMovie.imdbID = imdbID; actor.Add(actorMovie); } } } return true; } catch (Exception ex) { Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace); } return false; }