示例#1
0
    // Filmograpy and bio
    public bool GetActorDetails(IMDBUrl url, out IMDBActor actor)
    {
      actor = new IMDBActor();

      string[] vdbParserStr = VdbParserStringActorDetails();

      if (vdbParserStr == null || vdbParserStr.Length != 46)
      {
        return false;
      }

      try
      {
        string absoluteUri;
        string strBody = GetPage(url.URL, "utf-8", out absoluteUri);
        
        if (strBody == null)
        {
          return false;
        }
        
        if (strBody.Length == 0)
        {
          return false;
        }
        
        #region Actor imdb id

        // IMDBActorID
        try
        {
          int pos = url.URL.LastIndexOf("nm");
          string id = url.URL.Substring(pos, 9).Replace("/", string.Empty);
          actor.IMDBActorID = id;
        }
        catch (Exception) { }

        #endregion

        HTMLParser parser = new HTMLParser(strBody);
        string strThumb = string.Empty;
        string value = string.Empty;
        string value2 = string.Empty;
        
        #region Actor name

        // Actor name
        if ((parser.skipToEndOf(vdbParserStr[0])) &&        // <title>
            (parser.extractTo(vdbParserStr[1], ref value))) // - IMDb</title>
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          value = Util.Utils.RemoveParenthesis(value).Trim();
          actor.Name = HttpUtility.HtmlDecode(value.Trim());
        }
        
        if (actor.Name == string.Empty)
        {
          actor.Name = url.Title;
        }

        #endregion

        // Photo
        string parserTxt = parser.Content;
        string photoBlock = string.Empty;

        #region Actor photo

        if (parser.skipToStartOf(vdbParserStr[2]) &&              // <td id="img_primary"
            (parser.extractTo(vdbParserStr[3], ref photoBlock)))  // </td>
        {
          parser.Content = photoBlock;
        
          if ((parser.skipToEndOf(vdbParserStr[4])) &&            // <img src="
              (parser.extractTo(vdbParserStr[5], ref strThumb)))  // "
          {
            actor.ThumbnailUrl = strThumb;
          }
          parser.Content = parserTxt;
        }
        
        #endregion

        #region Actor birth date

        // Birth date
        if ((parser.skipToEndOf(vdbParserStr[6])) &&          // >Born:</h4>
            (parser.skipToEndOf(vdbParserStr[7])) &&          // birth_monthday=
            (parser.skipToEndOf(vdbParserStr[8])) &&          // >
            (parser.extractTo(vdbParserStr[9], ref value)) && // <
            (parser.skipToEndOf(vdbParserStr[10])) &&         // year=
            (parser.extractTo(vdbParserStr[11], ref value2))) // "

        {
          actor.DateOfBirth = value + " " + value2;
        }

        #endregion

        #region Actor death date

        // Death date
        if ((parser.skipToEndOf(vdbParserStr[12])) &&           // >Died:</h4>
            (parser.skipToEndOf(vdbParserStr[13])) &&           // death_monthday="
            (parser.skipToEndOf(vdbParserStr[14])) &&           // >
            (parser.extractTo(vdbParserStr[15], ref value)) &&  // <
            (parser.skipToEndOf(vdbParserStr[16])) &&           // death_date="
            (parser.extractTo(vdbParserStr[17], ref value2)))   // "
        {
          actor.DateOfDeath = value + " " + value2;
        }

        #endregion

        parser.resetPosition();

        #region Actor birth place

        // Birth place
        if ((parser.skipToEndOf(vdbParserStr[18])) &&         // birth_place=
            (parser.skipToEndOf(vdbParserStr[19])) &&         // >
            (parser.extractTo(vdbParserStr[20], ref value)))  // <
        {
          actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
        }

        #endregion

        #region Actor death place

        // Death place
        if ((parser.skipToEndOf(vdbParserStr[21])) &&         // death_place=
            (parser.skipToEndOf(vdbParserStr[22])) &&         // >
            (parser.extractTo(vdbParserStr[23], ref value)))  // <
        {
          actor.PlaceOfDeath = HttpUtility.HtmlDecode(value);
        }

        #endregion

        //Mini Biography
        parser.resetPosition();

        #region Actor biography

        if ((parser.skipToEndOf(vdbParserStr[24])) &&         // <td id="overview-top">
            (parser.skipToEndOf(vdbParserStr[25])) &&         // <p>
            (parser.extractTo(vdbParserStr[26], ref value)))  // See full bio</a>
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          actor.MiniBiography = Util.Utils.stripHTMLtags(value);
          actor.MiniBiography = actor.MiniBiography.Replace(vdbParserStr[45], string.Empty).Trim(); // See full bio »
          actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like &#189;
          
          if (actor.MiniBiography != string.Empty)
          {
            // get complete biography
            string bioURL = absoluteUri;
            
            if (!bioURL.EndsWith(vdbParserStr[27])) // /
            {
              bioURL += vdbParserStr[28];           // /bio
            }
            else
            {
              bioURL += vdbParserStr[29];           // bio
            }

            string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);
            
            if (!string.IsNullOrEmpty(strBioBody))
            {
              HTMLParser parser1 = new HTMLParser(strBioBody);

              if (parser1.skipToEndOf(vdbParserStr[30]) &&        // <h5>Mini Biography</h5>
                  parser1.skipToEndOf(vdbParserStr[31]) &&        // <div class="wikipedia_bio">
                  parser1.extractTo(vdbParserStr[32], ref value)) // </div>
              {
                value = new HTMLUtil().ConvertHTMLToAnsi(value);
                value = Regex.Replace(value, @"</h5>\s<h5>", "\n\r");
                value = Regex.Replace(value, @"<h5>", "\n\r\n\r");
                value = Regex.Replace(value, @"</h5>", ":\n\r");
                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
              }
              else
              {
                parser1.resetPosition();
                
                if (parser1.skipToEndOf(vdbParserStr[33]) &&      // <h5>Mini Biography</h5>
                  parser1.extractTo(vdbParserStr[34], ref value)) // </p>
                {
                  value = new HTMLUtil().ConvertHTMLToAnsi(value);
                  actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                  actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
                }
              }
            }
          }
        }

        #endregion

        // Person is movie director or an actor/actress
        bool isActorPass = false;
        bool isDirectorPass = false;
        bool isWriterPass = false;
        
        parser.resetPosition();

        HTMLParser dirParser = new HTMLParser(); // HTML body for Director
        HTMLParser wriParser = new HTMLParser(); // HTML body for Writers

        #region Check person role in movie (actor, director or writer)

        if ((parser.skipToEndOf(vdbParserStr[35])) && // name="Director">Director</a>
            (parser.skipToEndOf(vdbParserStr[36])))   // </div>
        {
          isDirectorPass = true;
          dirParser.Content = parser.Content;
        }
        
        parser.resetPosition();

        if ((parser.skipToEndOf(vdbParserStr[37])) && // name="Writer">Writer</a>
            (parser.skipToEndOf(vdbParserStr[38])))   // </div>
        {
          isWriterPass = true;
          wriParser.Content = parser.Content;
        }

        parser.resetPosition();

        if (parser.skipToEndOf(vdbParserStr[39]) || // name="Actress">Actress</a>
          parser.skipToEndOf(vdbParserStr[40]))     // name="Actor">Actor</a>
        {
          isActorPass = true;
        }

        #endregion

        #region Get movies for every role

        // Get filmography Actor
        if (isActorPass)
        {
          GetActorMovies(actor, parser, false, false);
        }
        
        // Get filmography for writers
        if (isWriterPass)
        {
          parser = wriParser;
          parser.resetPosition();

          if ((parser.skipToEndOf(vdbParserStr[41])) && // name="Writer">Writer</a>
            (parser.skipToEndOf(vdbParserStr[42])))     // </div>
          {
            GetActorMovies(actor, parser, false, true);
          }
        }

        // Get filmography Director
        if (isDirectorPass)
        {
          parser = dirParser;
          parser.resetPosition();
          
          if (parser.skipToEndOf(vdbParserStr[43]) && // name="Director">Director</a>
              parser.skipToEndOf(vdbParserStr[44]))   // </div>
          {
            GetActorMovies(actor, parser, true, false);
          }
        }

        #endregion

        // Add filmography
        if (actor.Count > 0)
        {
          actor.SortActorMoviesByYear();
        }

        return true;
      }
      catch (Exception ex)
      {
        Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
      }
      return false;
    }
示例#2
0
    // Changed - parsing all actor DB fields through HTML (IMDB changed HTML code)
    public bool GetActorDetails(IMDBUrl url, bool director, out IMDBActor actor)
    {
      actor = new IMDBActor();
      try
      {
        string absoluteUri;
        string strBody = GetPage(url.URL, "utf-8", out absoluteUri);
        if (strBody == null)
        {
          return false;
        }
        if (strBody.Length == 0)
        {
          return false;
        }
        // IMDBActorID
        try
        {
          int pos = url.URL.LastIndexOf("nm");
          string id = url.URL.Substring(pos, 9).Replace("/", string.Empty);
          actor.IMDBActorID = id;
        }
        catch (Exception) {}

        HTMLParser parser = new HTMLParser(strBody);
        string strThumb = string.Empty;
        string value = string.Empty;
        string value2 = string.Empty;
        // Actor name
        if ((parser.skipToEndOf("<title>")) &&
            (parser.extractTo("- IMDb</title>", ref value)))
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          value = Util.Utils.RemoveParenthesis(value).Trim();
          actor.Name = HttpUtility.HtmlDecode(value.Trim());
        }
        if (actor.Name == string.Empty)
        {
          actor.Name = url.Title;
        }
        // Photo
        string parserTxt = parser.Content;
        string photoBlock = string.Empty;
        if (parser.skipToStartOf("<td id=\"img_primary\"") &&
            (parser.extractTo("</td>", ref photoBlock)))
        {
          parser.Content = photoBlock;
          if ((parser.skipToEndOf("<img src=\"")) &&
              (parser.extractTo("\"", ref strThumb)))
          {
            actor.ThumbnailUrl = strThumb;
          }
          parser.Content = parserTxt;
        }
        // Birth date
        if ((parser.skipToEndOf("Born:")) &&
            (parser.skipToEndOf("birth_monthday=")) &&
            (parser.skipToEndOf(">")) &&
            (parser.extractTo("<", ref value)) &&
            (parser.skipToEndOf("year=")) &&
            (parser.extractTo("\"", ref value2)))
          
        {
          actor.DateOfBirth = value + " " + value2;
        }
        // Death date
        if ((parser.skipToEndOf(">Died:</h4>")) &&
            (parser.skipToEndOf("deaths\">")) &&
            (parser.extractTo("<", ref value)) &&
            (parser.skipToEndOf("death_date=")) &&
            (parser.extractTo("\"", ref value2)))
        {
          if (actor.DateOfBirth == string.Empty)
            actor.DateOfBirth = "?";
          actor.DateOfBirth += " ~ " + value + " " + value2;
        }

        parser.resetPosition();
        // Birth place
        if ((parser.skipToEndOf("birth_place=")) &&
            (parser.skipToEndOf(">")) &&
            (parser.extractTo("<", ref value)))
        {
          actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
        }
        //Mini Biography
        parser.resetPosition();
        if ((parser.skipToEndOf("<td id=\"overview-top\">")) &&
            (parser.skipToEndOf("<p>")) &&
            (parser.extractTo("See full bio</a>", ref value)))
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          actor.MiniBiography = Util.Utils.stripHTMLtags(value);
          actor.MiniBiography = actor.MiniBiography.Replace("See full bio »", string.Empty).Trim();
          actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like &#189;
          if (actor.MiniBiography != string.Empty)
          {
            // get complete biography
            string bioURL = absoluteUri;
            if (!bioURL.EndsWith("/"))
            {
              bioURL += "/bio";
            }
            else
              bioURL += "bio";
            string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);
            if (!string.IsNullOrEmpty(strBioBody))
            {
              HTMLParser parser1 = new HTMLParser(strBioBody);
              if (parser1.skipToEndOf("<h5>Mini Biography</h5>") &&
                  parser1.extractTo("</p>", ref value))
              {
                value = new HTMLUtil().ConvertHTMLToAnsi(value);
                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                actor.Biography = HttpUtility.HtmlDecode(actor.Biography); // Remove HTML entities like &#189;
              }
            }
          }
        }
        // Person is movie director or an actor/actress
        bool isActorPass = false;
        bool isDirectorPass = false;
        parser.resetPosition();

        if (director)
        {
          if ((parser.skipToEndOf("name=\"Director\">Director</a>")) &&
              (parser.skipToEndOf("</div>")))
          {
            isDirectorPass = true;
          }
        }
        else
        {
          if (parser.skipToEndOf("name=\"Actress\">Actress</a>") || parser.skipToEndOf("name=\"Actor\">Actor</a>"))
          {
            isActorPass = true;
          }
        }
        // Get filmography
        if (isDirectorPass | isActorPass)
        {
          string movies = string.Empty;
          // Get films and roles block
          if (parser.extractTo("<div id", ref movies))
          {
            parser.Content = movies;
          }
          // Parse block for evey film and get year, title and it's imdbID and role
          while (parser.skipToStartOf("<span class=\"year_column\""))
          {
            string movie = string.Empty;
            if (parser.extractTo("<div class", ref movie))
            {
              movie += "</li>";
              HTMLParser movieParser = new HTMLParser(movie);
              string title = string.Empty;
              string strYear = string.Empty;
              string role = string.Empty;
              string imdbID = string.Empty;
              // IMDBid
              movieParser.skipToEndOf("title/");
              movieParser.extractTo("/", ref imdbID);
              // Title
              movieParser.resetPosition();
              movieParser.skipToEndOf("<a");
              movieParser.skipToEndOf(">");
              movieParser.extractTo("<br/>", ref title);
              title = Util.Utils.stripHTMLtags(title);
              title = title.Replace("\n", " ").Replace("\r", string.Empty);
              title = HttpUtility.HtmlDecode(title.Trim()); // Remove HTML entities like &#189;
              // Year
              movieParser.resetPosition();
              if (movieParser.skipToStartOf(">20") &&
                  movieParser.skipToEndOf(">"))
              {
                movieParser.extractTo("<", ref strYear);
              }
              else if (movieParser.skipToStartOf(">19") &&
                       movieParser.skipToEndOf(">"))
              {
                movieParser.extractTo("<", ref strYear);
              }
              // Roles
              if ((director == false) && (movieParser.skipToEndOf("<br/>"))) // Role case 1, no character link
              {
                movieParser.extractTo("<", ref role);
                role = Util.Utils.stripHTMLtags(role).Trim();
                role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                .Replace("\r", string.Empty).Trim());
                if (role == string.Empty) // Role case 2, with character link
                {
                  movieParser.resetPosition();
                  movieParser.skipToEndOf("<br/>");
                  movieParser.extractTo("</a>", ref role);
                  role = Util.Utils.stripHTMLtags(role).Trim();
                  role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                  .Replace("\r", string.Empty).Trim());
                }
              }
              else
              {
                // Just director
                if (director)
                  role = "Director";
              }

              int year = 0;
              try
              {
                year = Int32.Parse(strYear.Substring(0, 4));
              }
              catch (Exception)
              {
                year = 1900;
              }
              IMDBActor.IMDBActorMovie actorMovie = new IMDBActor.IMDBActorMovie();
              actorMovie.MovieTitle = title;
              actorMovie.Role = role;
              actorMovie.Year = year;
              actorMovie.imdbID = imdbID;
              actor.Add(actorMovie);
            }
          }
        }
        return true;
      }
      catch (Exception ex)
      {
        Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
      }
      return false;
    }