示例#1
0
        /// <summary>
        /// this method switches between the different databases to fetche the search result into movieDetails
        /// </summary>
        public bool GetDetails(IMDBUrl url, ref IMDBMovie movieDetails)
        {
            try
            {
                MovieInfoDatabase currentDB = null;

                foreach (MovieInfoDatabase db in _databaseList)
                {
                    if (db.ID == url.Database)
                    {
                        currentDB = db;
                    }
                }

                if (currentDB == null)
                {
                    return(false);
                }

                if (currentDB.Grabber == null)
                {
                    return(false);
                }


                currentDB.Grabber.GetDetails(url, ref movieDetails);
                return(true);
            }
            catch (Exception ex)
            {
                Log.Error("Movie database lookup GetDetails() - grabber: {0}, message : {1}", url.Database, ex.Message);
                return(false);
            }
        }
示例#2
0
        /// <summary>
        /// this method switches between the different databases to fetche the search result into movieDetails
        /// </summary>
        public bool GetDetails(IMDBUrl url, ref IMDBMovie movieDetails)
        {
            try
            {
                /*
                 * // extract host from url, to find out which mezhod should be called
                 * int		iStart = url.URL.IndexOf(".")+1;
                 * int		iEnd = url.URL.IndexOf(".",iStart);
                 * if ((iStart<0) || (iEnd<0))
                 * {
                 * // could not extract hostname!
                 * Log.Info("Movie DB lookup GetDetails(): could not extract hostname from {0}",url.URL);
                 * return false;
                 * }
                 * string	strHost = url.URL.Substring(iStart,iEnd-iStart).ToUpper();*/

                MovieInfoDatabase currentDB = null;
                foreach (MovieInfoDatabase db in _databaseList)
                {
                    if (db.ID == url.Database)
                    {
                        currentDB = db;
                    }
                }
                if (currentDB == null)
                {
                    return(false);
                }
                if (currentDB.Grabber == null)
                {
                    return(false);
                }


                currentDB.Grabber.GetDetails(url, ref movieDetails);
                return(true);
            }
            catch (Exception ex)
            {
                Log.Error("Movie database lookup GetDetails() - grabber: {0}, message : {1}", url.Database, ex.Message);
                return(false);
            }
        }
示例#3
0
        // Filmograpy and bio
        public bool GetActorDetails(IMDBUrl url, out IMDBActor actor)
        {
            actor = new IMDBActor();

            try
            {
                if (InternalActorsScriptGrabber.InternalActorsGrabber.GetActorDetails(url, out actor))
                {
                    // Add filmography
                    if (actor.Count > 0)
                    {
                        actor.SortActorMoviesByYear();
                    }

                    return(true);
                }
            }
            catch (Exception ex)
            {
                Log.Error("IMDB GetActorDetails Error: {0}", ex.Message);
            }
            return(false);
        }
示例#4
0
 public void SetIMDBActor(string strURL, string strName)
 {
   IMDBUrl oneUrl = new IMDBUrl(strURL, strName, "IMDB");
   _elements.Add(oneUrl);
 }
示例#5
0
    /// <summary>
    /// this method switches between the different databases to fetche the search result into movieDetails
    /// </summary>
    public bool GetDetails(IMDBUrl url, ref IMDBMovie movieDetails)
    {
      try
      {
        MovieInfoDatabase currentDB = null;
        
        foreach (MovieInfoDatabase db in _databaseList)
        {
          if (db.ID == url.Database)
          {
            currentDB = db;
          }
        }
        
        if (currentDB == null)
        {
          return false;
        }
        
        if (currentDB.Grabber == null)
        {
          return false;
        }


        currentDB.Grabber.GetDetails(url, ref movieDetails);
        return true;
      }
      catch (Exception ex)
      {
        Log.Error("Movie database lookup GetDetails() - grabber: {0}, message : {1}", url.Database, ex.Message);
        return false;
      }
    }
示例#6
0
    // Filmograpy and bio
    public bool GetActorDetails(IMDBUrl url, out IMDBActor actor)
    {
      actor = new IMDBActor();

      try
      {
        if (InternalActorsScriptGrabber.InternalActorsGrabber.GetActorDetails(url, out actor))
        {
          // Add filmography
          if (actor.Count > 0)
          {
            actor.SortActorMoviesByYear();
          }

          return true;
        }
      }
      catch (Exception ex)
      {
        Log.Error("IMDB GetActorDetails Error: {0}", ex.Message);
      }
      return false;
    }
示例#7
0
    // Changed - parsing all actor DB fields through HTML (IMDB changed HTML code)
    public bool GetActorDetails(IMDBUrl url, bool director, out IMDBActor actor)
    {
      actor = new IMDBActor();
      try
      {
        string absoluteUri;
        string strBody = GetPage(url.URL, "utf-8", out absoluteUri);
        if (strBody == null)
        {
          return false;
        }
        if (strBody.Length == 0)
        {
          return false;
        }
        // IMDBActorID
        try
        {
          int pos = url.URL.LastIndexOf("nm");
          string id = url.URL.Substring(pos, 9).Replace("/", string.Empty);
          actor.IMDBActorID = id;
        }
        catch (Exception) {}

        HTMLParser parser = new HTMLParser(strBody);
        string strThumb = string.Empty;
        string value = string.Empty;
        string value2 = string.Empty;
        // Actor name
        if ((parser.skipToEndOf("<title>")) &&
            (parser.extractTo("- IMDb</title>", ref value)))
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          value = Util.Utils.RemoveParenthesis(value).Trim();
          actor.Name = HttpUtility.HtmlDecode(value.Trim());
        }
        if (actor.Name == string.Empty)
        {
          actor.Name = url.Title;
        }
        // Photo
        string parserTxt = parser.Content;
        string photoBlock = string.Empty;
        if (parser.skipToStartOf("<td id=\"img_primary\"") &&
            (parser.extractTo("</td>", ref photoBlock)))
        {
          parser.Content = photoBlock;
          if ((parser.skipToEndOf("<img src=\"")) &&
              (parser.extractTo("\"", ref strThumb)))
          {
            actor.ThumbnailUrl = strThumb;
          }
          parser.Content = parserTxt;
        }
        // Birth date
        if ((parser.skipToEndOf("Born:")) &&
            (parser.skipToEndOf("birth_monthday=")) &&
            (parser.skipToEndOf(">")) &&
            (parser.extractTo("<", ref value)) &&
            (parser.skipToEndOf("year=")) &&
            (parser.extractTo("\"", ref value2)))
          
        {
          actor.DateOfBirth = value + " " + value2;
        }
        // Death date
        if ((parser.skipToEndOf(">Died:</h4>")) &&
            (parser.skipToEndOf("deaths\">")) &&
            (parser.extractTo("<", ref value)) &&
            (parser.skipToEndOf("death_date=")) &&
            (parser.extractTo("\"", ref value2)))
        {
          if (actor.DateOfBirth == string.Empty)
            actor.DateOfBirth = "?";
          actor.DateOfBirth += " ~ " + value + " " + value2;
        }

        parser.resetPosition();
        // Birth place
        if ((parser.skipToEndOf("birth_place=")) &&
            (parser.skipToEndOf(">")) &&
            (parser.extractTo("<", ref value)))
        {
          actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
        }
        //Mini Biography
        parser.resetPosition();
        if ((parser.skipToEndOf("<td id=\"overview-top\">")) &&
            (parser.skipToEndOf("<p>")) &&
            (parser.extractTo("See full bio</a>", ref value)))
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          actor.MiniBiography = Util.Utils.stripHTMLtags(value);
          actor.MiniBiography = actor.MiniBiography.Replace("See full bio »", string.Empty).Trim();
          actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like &#189;
          if (actor.MiniBiography != string.Empty)
          {
            // get complete biography
            string bioURL = absoluteUri;
            if (!bioURL.EndsWith("/"))
            {
              bioURL += "/bio";
            }
            else
              bioURL += "bio";
            string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);
            if (!string.IsNullOrEmpty(strBioBody))
            {
              HTMLParser parser1 = new HTMLParser(strBioBody);
              if (parser1.skipToEndOf("<h5>Mini Biography</h5>") &&
                  parser1.extractTo("</p>", ref value))
              {
                value = new HTMLUtil().ConvertHTMLToAnsi(value);
                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                actor.Biography = HttpUtility.HtmlDecode(actor.Biography); // Remove HTML entities like &#189;
              }
            }
          }
        }
        // Person is movie director or an actor/actress
        bool isActorPass = false;
        bool isDirectorPass = false;
        parser.resetPosition();

        if (director)
        {
          if ((parser.skipToEndOf("name=\"Director\">Director</a>")) &&
              (parser.skipToEndOf("</div>")))
          {
            isDirectorPass = true;
          }
        }
        else
        {
          if (parser.skipToEndOf("name=\"Actress\">Actress</a>") || parser.skipToEndOf("name=\"Actor\">Actor</a>"))
          {
            isActorPass = true;
          }
        }
        // Get filmography
        if (isDirectorPass | isActorPass)
        {
          string movies = string.Empty;
          // Get films and roles block
          if (parser.extractTo("<div id", ref movies))
          {
            parser.Content = movies;
          }
          // Parse block for evey film and get year, title and it's imdbID and role
          while (parser.skipToStartOf("<span class=\"year_column\""))
          {
            string movie = string.Empty;
            if (parser.extractTo("<div class", ref movie))
            {
              movie += "</li>";
              HTMLParser movieParser = new HTMLParser(movie);
              string title = string.Empty;
              string strYear = string.Empty;
              string role = string.Empty;
              string imdbID = string.Empty;
              // IMDBid
              movieParser.skipToEndOf("title/");
              movieParser.extractTo("/", ref imdbID);
              // Title
              movieParser.resetPosition();
              movieParser.skipToEndOf("<a");
              movieParser.skipToEndOf(">");
              movieParser.extractTo("<br/>", ref title);
              title = Util.Utils.stripHTMLtags(title);
              title = title.Replace("\n", " ").Replace("\r", string.Empty);
              title = HttpUtility.HtmlDecode(title.Trim()); // Remove HTML entities like &#189;
              // Year
              movieParser.resetPosition();
              if (movieParser.skipToStartOf(">20") &&
                  movieParser.skipToEndOf(">"))
              {
                movieParser.extractTo("<", ref strYear);
              }
              else if (movieParser.skipToStartOf(">19") &&
                       movieParser.skipToEndOf(">"))
              {
                movieParser.extractTo("<", ref strYear);
              }
              // Roles
              if ((director == false) && (movieParser.skipToEndOf("<br/>"))) // Role case 1, no character link
              {
                movieParser.extractTo("<", ref role);
                role = Util.Utils.stripHTMLtags(role).Trim();
                role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                .Replace("\r", string.Empty).Trim());
                if (role == string.Empty) // Role case 2, with character link
                {
                  movieParser.resetPosition();
                  movieParser.skipToEndOf("<br/>");
                  movieParser.extractTo("</a>", ref role);
                  role = Util.Utils.stripHTMLtags(role).Trim();
                  role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                  .Replace("\r", string.Empty).Trim());
                }
              }
              else
              {
                // Just director
                if (director)
                  role = "Director";
              }

              int year = 0;
              try
              {
                year = Int32.Parse(strYear.Substring(0, 4));
              }
              catch (Exception)
              {
                year = 1900;
              }
              IMDBActor.IMDBActorMovie actorMovie = new IMDBActor.IMDBActorMovie();
              actorMovie.MovieTitle = title;
              actorMovie.Role = role;
              actorMovie.Year = year;
              actorMovie.imdbID = imdbID;
              actor.Add(actorMovie);
            }
          }
        }
        return true;
      }
      catch (Exception ex)
      {
        Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
      }
      return false;
    }
示例#8
0
 // Changed - IMDB changed HTML code
 private void FindIMDBActor(string strURL)
 {
   try
   {
     string absoluteUri;
     // UTF-8 have problem with special country chars, default IMDB enc is used
     string strBody = GetPage(strURL, "utf-8", out absoluteUri);
     string value = string.Empty;
     HTMLParser parser = new HTMLParser(strBody);
     if ((parser.skipToEndOf("<title>")) &&
         (parser.extractTo("</title>", ref value)) && !value.ToLower().Equals("imdb name search"))
     {
       value = new HTMLUtil().ConvertHTMLToAnsi(value);
       value = Util.Utils.RemoveParenthesis(value).Trim();
       IMDBUrl oneUrl = new IMDBUrl(absoluteUri, value, "IMDB");
       _elements.Add(oneUrl);
       return;
     }
     parser.resetPosition();
     
     while (parser.skipToEndOfNoCase("Exact Matches"))
     {
       string url = string.Empty;
       string name = string.Empty;
       //<a href="/name/nm0000246/" onclick="set_args('nm0000246', 1)">Bruce Willis</a>
       if (parser.skipToStartOf("href=\"/name/"))
       {
         parser.skipToEndOf("href=\"");
         parser.extractTo("\"", ref url);
         parser.skipToEndOf("<br><a");
         parser.skipToEndOf(">");
         parser.extractTo("</a>", ref name);
         name = new HTMLUtil().ConvertHTMLToAnsi(name);
         name = Util.Utils.RemoveParenthesis(name).Trim();
         IMDBUrl newUrl = new IMDBUrl("http://akas.imdb.com" + url, name, "IMDB");
         _elements.Add(newUrl);
       }
       else
       {
         parser.skipToEndOfNoCase("</a>");
       }
     }
     // Maybe more actors with the similar name
     parser.resetPosition();
     
     while (parser.skipToEndOfNoCase("Popular Names"))
     {
       string url = string.Empty;
       string name = string.Empty;
       //<a href="/name/nm0000246/" onclick="set_args('nm0000246', 1)">Bruce Willis</a>
       if (parser.skipToStartOf("href=\"/name/"))
       {
         parser.skipToEndOf("href=\"");
         parser.extractTo("\"", ref url);
         parser.skipToEndOf("<br><a");
         parser.skipToEndOf(">");
         parser.extractTo("</a>", ref name);
         name = new HTMLUtil().ConvertHTMLToAnsi(name);
         name = Util.Utils.RemoveParenthesis(name).Trim();
         IMDBUrl newUrl = new IMDBUrl("http://akas.imdb.com" + url, name, "IMDB");
         _elements.Add(newUrl);
       }
       else
       {
         parser.skipToEndOfNoCase("</a>");
       }
     }
   }
   catch (Exception ex)
   {
     Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", strURL, ex.Message, ex.StackTrace);
   }
 }
示例#9
0
    /// <summary>
    /// this method switches between the different databases to fetche the search result into movieDetails
    /// </summary>
    public bool GetDetails(IMDBUrl url, ref IMDBMovie movieDetails)
    {
      try
      {
        /*
        // extract host from url, to find out which mezhod should be called
        int		iStart = url.URL.IndexOf(".")+1;
        int		iEnd = url.URL.IndexOf(".",iStart);
        if ((iStart<0) || (iEnd<0))
        {
          // could not extract hostname!
          Log.Info("Movie DB lookup GetDetails(): could not extract hostname from {0}",url.URL);
          return false;
        }
        string	strHost = url.URL.Substring(iStart,iEnd-iStart).ToUpper();*/

        MovieInfoDatabase currentDB = null;
        foreach (MovieInfoDatabase db in _databaseList)
        {
          if (db.ID == url.Database)
          {
            currentDB = db;
          }
        }
        if (currentDB == null)
        {
          return false;
        }
        if (currentDB.Grabber == null)
        {
          return false;
        }


        currentDB.Grabber.GetDetails(url, ref movieDetails);
        return true;
      }
      catch (Exception ex)
      {
        Log.Error("Movie database lookup GetDetails() - grabber: {0}, message : {1}", url.Database, ex.Message);
        return false;
      }
    }
示例#10
0
文件: Grab.cs 项目: GuzziMP/my-films
        private void FindMovies_New(string strSearchInit, string strConfigFile, int strPage, bool alwaysAsk, out ArrayList ListUrl, out short WIndex)
        {
            #region variables
              WIndex = -1;
              string strSearch = strSearchInit;
              string strTemp = string.Empty;
              string strBody = string.Empty;
              string strItem = string.Empty;
              string strURL;
              string strSearchCleanup = string.Empty;
              string strVersion = string.Empty;
              string strStart = string.Empty;
              string strEnd = string.Empty;
              string strNext = string.Empty;
              string absoluteUri;
              string strStartItem = string.Empty; // selected item for grabbing
              string strStartTitle = string.Empty;
              string strEndTitle = string.Empty;
              string strStartYear = string.Empty;
              string strEndYear = string.Empty;
              string strStartDirector = string.Empty;
              string strEndDirector = string.Empty;
              string strStartLink = string.Empty;
              string strEndLink = string.Empty;
              string strStartID = string.Empty;
              string strEndID = string.Empty;
              string strStartOptions = string.Empty;
              string strEndOptions = string.Empty;
              string strStartAkas = string.Empty;
              string strEndAkas = string.Empty;
              string strKeyAkasRegExp = string.Empty;
              string strStartThumb = string.Empty;
              string strEndThumb = string.Empty;

              string strTitle = string.Empty;
              string strYear = string.Empty;
              string strDirector = string.Empty;
              string strID = string.Empty;
              string strOptions = string.Empty;
              string strAkas = string.Empty;
              string strThumb = string.Empty;

              string strIMDB_Id = string.Empty;
              string strTMDB_Id = string.Empty;
              string strLink = string.Empty;
              string strDBName;
              string strStartPage = string.Empty;
              int wStepPage = 0;
              int iFinItem = 0;
              int iStartItemLength = 0;
              int iStartTitle = 0;
              int iStartYear = 0;
              int iStartDirector = 0;
              int iStartID = 0;
              int iStartOptions = 0;
              int iStartAkas = 0;
              int iStartThumb = 0;
              int iStartUrl = 0;
              int iStart = 0;
              int iEnd = 0;
              int iLength = 0;
              string strRedir = string.Empty;
              string strParam1 = string.Empty;
              string strParam2 = string.Empty;
              #endregion

              elements.Clear();

              GrabUtil.RemoveStackEndings(ref strSearchInit);

              #region Regex creation with name of movie file
              byte[] bytes = Encoding.GetEncoding(1251).GetBytes(strSearchInit.ToLower());
              string file = Encoding.ASCII.GetString(bytes);
              file = GrabUtil.FilterFileName(file);
              file = file.Replace("-", " ");
              file = file.Replace("+", " ");
              file = file.Replace("!", " ");
              file = file.Replace("#", " ");
              file = file.Replace(";", " ");
              file = file.Replace(".", " ");
              file = file.Replace(",", " ");
              file = file.Replace("=", " ");
              file = file.Replace("&", " ");
              file = file.Replace("(", " ");
              file = file.Replace(")", " ");
              file = file.Replace("@", " ");
              file = file.Replace("%", " ");
              file = file.Replace("$", " ");
              file = file.Replace(":", " ");
              file = file.Replace("_", " ");
              file = file.Trim();
              var oRegex = new Regex(" +");
              file = oRegex.Replace(file, ":");
              file = file.Replace(":", ".*");
              oRegex = new Regex(file);
              #endregion

              #region Loading the configuration file
              //var doc = new XmlDocument();
              //doc.Load(strConfigFile);
              //script = doc.ChildNodes[1].FirstChild;
              // new method
              using (var fs = new FileStream(strConfigFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
              {
            grabberscript = new Grabber.Data.GrabberScript();
            //// synchronize dataset with hierarchical XMLdoc
            //xmlDoc = new XmlDataDocument(data);
            //xmlDoc.Load(fs);
            grabberscript.ReadXml(fs);
            fs.Close();
              }
              #endregion

              #region old loading method - DISABLED

              //strDBName = script.SelectSingleNodeFast("DBName").InnerText;

              //if (strDBName.ToLower().StartsWith("ofdb") && strSearchInit.Length > 3) // Optimization for searches with ofdb
              //{
              //  string strLeft = "";
              //  strLeft = strSearchInit.Substring(0, 3);
              //  if (strLeft.ToLower().Contains("der") || strLeft.ToLower().Contains("die") || strLeft.ToLower().Contains("das") || strLeft.ToLower().Contains("the"))
              //  {
              //    strSearchInit = strSearchInit.Substring(3).Trim() + ", " + strLeft.Trim();
              //    strSearch = strSearchInit;
              //  }
              //}

              //// retrieve manual encoding override, if any
              //try { strEncoding = script.SelectSingleNodeFast("Encoding").InnerText; }
              //catch (Exception) { strEncoding = ""; }

              //try // retrieve language, if any
              //{ strLanguage = script.SelectSingleNodeFast("Language").InnerText; }
              //catch (Exception) { strLanguage = ""; }

              //try // retrieve type, if any
              //{ strType = script.SelectSingleNodeFast("Type").InnerText; }
              //catch (Exception) { strType = ""; }

              //try // retrieve version, if any
              //{ strVersion = script.SelectSingleNodeFast("Version").InnerText; }
              //catch (Exception) { strVersion = ""; }

              //try // retrieve SearchCleanupDefinition, if any
              //{ strSearchCleanup = script.SelectSingleNodeFast("SearchCleanup").InnerText; }
              //catch (Exception) { strSearchCleanup = ""; }

              //try // retrieve SearchCleanupDefinition, if any
              //{ strAccept = script.SelectSingleNodeFast("Accept").InnerText; }
              //catch (Exception) { strAccept = ""; }

              //try // retrieve SearchCleanupDefinition, if any
              //{ strUserAgent = script.SelectSingleNodeFast("UserAgent").InnerText; }
              //catch (Exception) { strUserAgent = ""; }

              //try // retrieve SearchCleanupDefinition, if any
              //{ strHeaders = script.SelectSingleNodeFast("Headers").InnerText; }
              //catch (Exception) { strHeaders = ""; }

              //#region Retrieves the URL
              //strUrl = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/URL").InnerText);
              //strRedir = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/URL").Attributes["Param1"].InnerText);

              //strSearch = GrabUtil.CleanupSearch(strSearch, strSearchCleanup); // process SearchCleanup
              //strSearch = GrabUtil.EncodeSearch(strSearch, strEncoding); // Process Encoding of Search Expression

              //strUrl = strUrl.Replace("#Search#", strSearch);

              ////Retrieves the identifier of the next page
              //strNext = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyNextPage").InnerText);
              //strNext = strNext.Replace("#Search#", strSearch);
              //#endregion

              //#region Load number of first and following pages
              ////Récupère Le n° de la première page
              //strStartPage = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartPage").InnerText);
              ////Récupère Le step de page
              //try { wStepPage = Convert.ToInt16(XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStepPage").InnerText)); }
              //catch { wStepPage = 1; }

              //int wpage = strPage;
              //int wpagedeb;
              //int wpageprev = 0;
              ////Fetch The No. of the first page
              //try { wpagedeb = Convert.ToInt16(XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartPage").InnerText)); }
              //catch { wpagedeb = 1; }
              //if (wpage - wStepPage < wpagedeb)
              //  wpageprev = -1;
              //else
              //  wpageprev = wpage - wStepPage;
              //#endregion

              //******************************/
              //* Search titles and links
              //******************************/

              ////Gets Key to the first page if it exists (not required)...
              //strStart = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartList").InnerText);
              //strEnd = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndList").InnerText);

              //strStartTitle = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartTitle").InnerText);
              //strEndTitle = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndTitle").InnerText);
              //strStartYear = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartYear").InnerText);
              //strEndYear = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndYear").InnerText);
              //strStartDirector = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartDirector").InnerText);
              //strEndDirector = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndDirector").InnerText);
              //strStartLink = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartLink").InnerText);
              //strEndLink = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndLink").InnerText);
              //strStartID = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartID").InnerText);
              //strEndID = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndID").InnerText);
              //strStartOptions = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartOptions").InnerText);
              //strEndOptions = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndOptions").InnerText);
              //strStartAkas = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartAkas").InnerText);
              //strEndAkas = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndAkas").InnerText);
              //strKeyAkasRegExp = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyAkasRegExp").InnerText);
              //try { strStartThumb = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartThumb").InnerText); }
              //catch (Exception) { strStartThumb = ""; }
              //try { strEndThumb = XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyEndThumb").InnerText); }
              //catch (Exception) { strEndThumb = ""; }

              #endregion

              #region new loading method

              Data.GrabberScript.SettingsRow settings = grabberscript.Settings[0];
              strDBName = settings.DBName;

              if (strDBName.ToLower().StartsWith("ofdb") && strSearchInit.Length > 3) // Optimization for searches with ofdb
              {
            string strLeft = "";
            strLeft = strSearchInit.Substring(0, 3);
            if (strLeft.ToLower().Contains("der") || strLeft.ToLower().Contains("die") || strLeft.ToLower().Contains("das") || strLeft.ToLower().Contains("the"))
            {
              strSearchInit = strSearchInit.Substring(3).Trim() + ", " + strLeft.Trim();
              strSearch = strSearchInit;
            }
              }

              // retrieve manual encoding override, if any
              try { strEncoding = settings.Encoding; }
              catch (Exception) { strEncoding = ""; }

              try // retrieve language, if any
              { strLanguage = settings.Language; }
              catch (Exception) { strLanguage = ""; }

              try // retrieve SearchCleanupDefinition, if any
              { strSearchCleanup = settings.SearchCleanup; }
              catch (Exception) { strSearchCleanup = ""; }

              try // retrieve SearchCleanupDefinition, if any
              { strAccept = settings.Accept; }
              catch (Exception) { strAccept = ""; }

              try // retrieve SearchCleanupDefinition, if any
              { strUserAgent = settings.Useragent; }
              catch (Exception) { strUserAgent = ""; }

              try // retrieve SearchCleanupDefinition, if any
              { strHeaders = settings.Headers; }
              catch (Exception) { strHeaders = ""; }

              #region Retrieves the URL

              Data.GrabberScript.URLSearchRow search = grabberscript.URLSearch[0];
              strURL = search.URL;
              strRedir = search.Redirection;

              strSearch = GrabUtil.ReplaceNormalOrRegex(strSearch, strSearchCleanup); // process SearchCleanup
              strSearch = GrabUtil.EncodeSearch(strSearch, strEncoding); // Process Encoding of Search Expression

              strURL = strURL.Replace("#Search#", strSearch);

              //Retrieves the identifier of the next page
              strNext = search.NextPage;
              strNext = strNext.Replace("#Search#", strSearch);
              #endregion

              #region Load number of first and following pages
              try { wStepPage = Convert.ToInt16(search.StepPage); }
              catch { wStepPage = 1; }

              int wpage = strPage;
              int wpagedeb;
              int wpageprev = 0;
              //Fetch The No. of the first page
              try { wpagedeb = Convert.ToInt16(search.StartPage); }
              catch { wpagedeb = 1; }
              if (wpage - wStepPage < wpagedeb)
            wpageprev = -1;
              else
            wpageprev = wpage - wStepPage;
              #endregion

              /******************************/
              /* Search titles and links
              /******************************/

              //Gets Key to the first page if it exists (not required)...
              Data.GrabberScript.SearchDetailsRow sd;
              foreach (var row in search.GetSearchDetailsRows().Where(row => row.Name == "Title"))
              {
            sd = row;
            break;
              }
              foreach (var searchdetails in search.GetSearchDetailsRows())
              {
            switch (searchdetails.Name)
            {
              case "List":
            strStart = searchdetails.Start;
            strEnd = searchdetails.End;
            break;
              case "Title":
            strStartTitle = searchdetails.Start;
            strEndTitle = searchdetails.End;
            break;
              case "Year":
            strStartYear = searchdetails.Start;
            strEndYear = searchdetails.End;
            break;
              case "Director":
            strStartDirector = searchdetails.Start;
            strEndDirector = searchdetails.End;
            break;
              case "DetailsURL":
            strStartLink = searchdetails.Start;
            strEndLink = searchdetails.End;
            break;
              case "ID":
            strStartID = searchdetails.Start;
            strEndID = searchdetails.End;
            break;
              case "Options":
            strStartOptions = searchdetails.Start;
            strEndOptions = searchdetails.End;
            break;
              case "Akas":
            strStartAkas = searchdetails.Start;
            strEndAkas = searchdetails.Start;
            strKeyAkasRegExp = searchdetails.RegExp;
            break;
              case "Thumb":
            try
            {
              strStartThumb = searchdetails.Start;
            }
            catch (Exception)
            {
              strStartThumb = "";
            }
            try
            {
              strEndThumb = searchdetails.End;
            }
            catch (Exception)
            {
              strEndThumb = "";
            }
            break;
            }
              }

              #endregion

              var cook = new CookieContainer();

              #region now load the searchresults

              //Récupère la page wpage
              strBody = GrabUtil.GetPage(strURL.Replace("#Page#", wpage.ToString()), strEncoding, out absoluteUri, cook, strHeaders, strAccept, strUserAgent);
              //redirection auto : 1 résult
              if (!absoluteUri.Equals(strURL.Replace("#Page#", wpage.ToString())))
              {
            IMDBUrl url = new IMDBUrl(absoluteUri, strSearchInit + " (AutoRedirect)", strDBName, null, wpage.ToString());
            elements.Add(url);
            ListUrl = elements;
            WIndex = 0;
            return;
              }

              if (strRedir.Length > 0)
            strBody = GrabUtil.GetPage(strRedir, strEncoding, out absoluteUri, cook, strHeaders, strAccept, strUserAgent);

              wpage += wStepPage;

              /******************************/
              /* Cutting the list
              /******************************/
              // If you have at least the key to start, we cut strBody
              iStart = 0;
              iEnd = 0;
              iLength = 0;

              if (strStart.Length > 0)
              {
            iStart = GrabUtil.FindPosition(strBody, strStart, iStart, ref iLength, true, true);
            if (iStart < 0) iStart = 0;
              }
              if (strEnd.Length > 0)
              {
            iEnd = GrabUtil.FindPosition(strBody, strEnd, iStart, ref iLength, true, false);
            if (iEnd <= 0) iEnd = strBody.Length;
              }

              // Cutting the body
              try { strBody = strBody.Substring(iStart, iEnd - iStart); }
              catch { }

              // Now grab the search data from stripped search page !
              iStart = 0;
              iFinItem = 0;
              iStartTitle = 0;
              iStartUrl = 0;
              iLength = 0;
              IMDBUrl urlprev = new IMDBUrl(strURL.Replace("#Page#", wpageprev.ToString()), "---", strDBName, script, wpageprev.ToString());

              if (strBody != "")
              {
            // Comparison between the position of URL and title to boundary elements //if (strBody.IndexOf(strStartTitle, 0) > strBody.IndexOf(strStartLink, 0))
            int iPosStartTitle = 0; iPosStartTitle = GrabUtil.FindPosition(strBody, strStartTitle, iPosStartTitle, ref iLength, true, false);
            int iPosStartLink = 0; iPosStartLink = GrabUtil.FindPosition(strBody, strStartLink, iPosStartLink, ref iLength, true, false);
            strStartItem = iPosStartTitle > iPosStartLink ? strStartLink : strStartTitle;

            // set start position for all elements (lowest possible position found)
            iFinItem = GrabUtil.FindPosition(strBody, strStartItem, iFinItem, ref iLength, true, false);
            iStartItemLength = iLength;

            // iFinItem += strStartItem.Length;
            iStartYear = iStartDirector = iStartUrl = iStartTitle = iStartID = iStartOptions = iStartAkas = iStartThumb = iFinItem;

            while (true)
            {
              // determining the end of nth Item (if the index fields are higher then found => no info for this item
              if (iFinItem <= 0) break;
              //iFinItem = GrabUtil.FindPosition(strBody, strStartItem, iFinItem + strStartItem.Length, ref iLength, true, false);
              iFinItem = GrabUtil.FindPosition(strBody, strStartItem, iFinItem + iStartItemLength, ref iLength, true, false);
              iStartItemLength = iLength;
              // Initialisation

              #region Read Movie Title

              strParam1 = SearchDetail("Title").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartTitle").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("Title").Param2; // //XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartTitle").Attributes["Param2"].InnerText);

              strTitle = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartTitle, ref iStartTitle, strEndTitle, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartTitle, ref iStartTitle, strEndTitle).Trim();

              if (strTitle.Length == 0)
            break;

              // Reorder article for ofdb to beginning
              if (strDBName.ToLower().StartsWith("ofdb") && strTitle.Length > 3) // Optimization for searches with ofdb
              {
            string strRight = "";
            strRight = strTitle.Substring(strTitle.Length - 3);
            if (strRight.ToLower().Contains("der") || strRight.ToLower().Contains("die") || strRight.ToLower().Contains("das") || strRight.ToLower().Contains("the"))
            {

              strTitle = strRight.Trim() + " " + strTitle.Substring(0, strTitle.Length - 3).Trim().Trim(',');
            }
              }
              #endregion

              #region Title outbound range Item re-delimit range item
              if ((iStartTitle > iFinItem) && (iFinItem != -1))
              {
            iStartYear = iStartDirector = iStartUrl = iStartTitle = iStartID = iStartOptions = iStartAkas = iStartThumb = iFinItem;
            //iFinItem = strBody.IndexOf(strStartItem, iFinItem + strStartItem.Length);
            //iFinItem = GrabUtil.FindPosition(strBody, strStartItem, iFinItem + strStartItem.Length, ref iLength, true, false);
            iFinItem = GrabUtil.FindPosition(strBody, strStartItem, iFinItem + iStartItemLength, ref iLength, true, false);
            iStartItemLength = iLength;
            if (iFinItem <= 0)
              break;
              }
              #endregion

              #region  read movie year
              strParam1 = SearchDetail("Year").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartYear").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("Year").Param2; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartYear").Attributes["Param2"].InnerText);

              strYear = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartYear, ref iStartYear, strEndYear, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartYear, ref iStartYear, strEndYear).Trim();

              if ((strYear.Length == 0) || ((iStartYear > iFinItem) && iFinItem != -1))
            strYear = string.Empty;
              #endregion

              #region read movie director
              strParam1 = SearchDetail("Director").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartDirector").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("Director").Param2; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartDirector").Attributes["Param2"].InnerText);

              strDirector = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartDirector, ref iStartDirector, strEndDirector, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartDirector, ref iStartDirector, strEndDirector).Trim();

              if ((strDirector.Length == 0) || ((iStartDirector > iFinItem) && iFinItem != -1))
            strDirector = string.Empty;
              #endregion

              #region read movie ID
              strParam1 = SearchDetail("ID").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartID").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("ID").Param2; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartID").Attributes["Param2"].InnerText);

              strID = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartID, ref iStartID, strEndID, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartID, ref iStartID, strEndID).Trim();

              if (strID.Length == 0 || (iStartID > iFinItem && iFinItem != -1))
            strID = string.Empty;
              #endregion

              #region read movie Options
              strParam1 = SearchDetail("Options").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartOptions").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("Options").Param2; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartOptions").Attributes["Param2"].InnerText);

              strOptions = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartOptions, ref iStartOptions, strEndOptions, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartOptions, ref iStartOptions, strEndOptions).Trim();

              if ((strOptions.Length == 0) || ((iStartOptions > iFinItem) && iFinItem != -1))
            strOptions = string.Empty;
              #endregion

              #region read movie Akas
              strParam1 = SearchDetail("Akas").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartAkas").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("Akas").Param2; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartAkas").Attributes["Param2"].InnerText);

              strAkas = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartAkas, ref iStartAkas, strEndAkas, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartAkas, ref iStartAkas, strEndAkas).Trim();

              if (strAkas.Length == 0 || (iStartAkas > iFinItem && iFinItem != -1))
            strAkas = string.Empty;
              #endregion

              #region read movie Thumb
              strParam1 = SearchDetail("Thumb").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartThumb").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("Thumb").Param2; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartThumb").Attributes["Param2"].InnerText);

              strThumb = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartThumb, ref iStartThumb, strEndThumb, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartThumb, ref iStartThumb, strEndThumb).Trim();

              if ((strThumb.Length == 0) || ((iStartThumb > iFinItem) && iFinItem != -1))
            strThumb = string.Empty;
              #endregion

              #region create akas string with titles
              if (!String.IsNullOrEmpty(strKeyAkasRegExp)) // strKeyAkasRegExp = @"aka." + "\"" + ".*?" + "\"" + ".-";
              {
            strTemp = strAkas;
            strTemp = HttpUtility.HtmlDecode(strTemp);
            strTemp = HttpUtility.HtmlDecode(strTemp).Replace("\n", "");
            Regex p = new Regex(strKeyAkasRegExp, RegexOptions.Singleline);
            iLength = 0;

            MatchCollection MatchList = p.Matches(strTemp);
            if (MatchList.Count > 0)
            {
              string matchstring = "";
              foreach (Match match in MatchList)
              {
                if (matchstring.Length > 0) matchstring += " | " + match.Groups["aka"].Value;
                else matchstring = match.Groups["aka"].Value;
              }
              if (matchstring.Length > 0)
                strAkas = matchstring;
            }
            // else strAkas = "";
              }
              #endregion

              #region read movie url
              strParam1 = SearchDetail("DetailsURL").Param1; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartLink").Attributes["Param1"].InnerText);
              strParam2 = SearchDetail("DetailsURL").Param2; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLSearch/KeyStartLink").Attributes["Param2"].InnerText);

              strLink = strParam1.Length > 0 ? GrabUtil.FindWithAction(strBody, strStartLink, ref iStartUrl, strEndLink, strParam1, strParam2).Trim() : GrabUtil.Find(strBody, strStartLink, ref iStartUrl, strEndLink).Trim();

              if (strLink.Length != 0)
              {
            // check, if IMDB id is existing
            if (strLink.Contains(@"/tt"))
            {
              strIMDB_Id = strLink.Substring(strLink.IndexOf(@"/tt") + 1);
              strIMDB_Id = strIMDB_Id.Substring(0, strIMDB_Id.IndexOf(@"/"));
              // Fix for redirection on AKAS site on IMDB
              if (strLink.Contains("onclick"))
                strLink = strLink.Substring(0, strLink.IndexOf("onclick"));
            }
            // check, if TMDB id is existing
            if (strLink.Contains(@"themoviedb.org/movie/"))
            {
              strIMDB_Id = strLink.Substring(strLink.IndexOf(@"themoviedb.org/movie/") + 20); // assign TMDB ID
            }
            if (!strLink.StartsWith("http://") && !strLink.StartsWith("www."))
            {
              //si les liens sont relatifs on rajoute le préfix (domaine)
              strLink = grabberscript.Settings[0].URLPrefix; // XmlConvert.DecodeName(script.SelectSingleNodeFast("URLPrefix").InnerText + strLink);
            }

            //Ajout http:// s'il n'y est pas (Pour GetPage)
            if (strLink.StartsWith("www."))
              strLink = "http://" + strLink;

            //Added new element, we pass the node of xml file to find the details

            //IMDBUrl url = new IMDBUrl(strLink, strTitle + " (" + strYear + ") " + strDirector, strDBName, n, wpage.ToString(), strYear, strDirector, strImdbId, strTmdbId) ;
            IMDBUrl url = new IMDBUrl(strLink, strTitle, strDBName, script, wpage.ToString(), strYear, strDirector, strIMDB_Id, strTMDB_Id, strID, strOptions, strAkas, strThumb);
            bytes = Encoding.GetEncoding(1251).GetBytes(strTitle.ToLower());
            MatchCollection oMatches = oRegex.Matches(Encoding.ASCII.GetString(bytes));
            if (oMatches.Count > 0)
              if (alwaysAsk)
                WIndex = -2;
              else
                if (WIndex == -1)
                  WIndex = (short)elements.Count;
                else
                  WIndex = -2;

            if ((elements.Count == 0) && (strNext.Length > 0) && (strBody.Contains(strNext.Replace("#Page#", wpageprev.ToString()))) && !(wpageprev < 0))
              elements.Add(urlprev);
            elements.Add(url);
              }
              #endregion

              // init new search indexes
              iStartYear = iStartDirector = iStartUrl = iStartTitle = iStartID = iStartOptions = iStartAkas = iStartThumb = iFinItem;
            }
              }
              IMDBUrl urlsuite = new IMDBUrl(strURL.Replace("#Page#", wpage.ToString()), "+++", strDBName, script, wpage.ToString());

              if ((strBody.Contains(strNext.Replace("#Page#", wpage.ToString()))) && (strNext.Length > 0))
            elements.Add(urlsuite);
              #endregion

              ListUrl = elements;
        }
示例#11
0
        public void SetIMDBActor(string strURL, string strName)
        {
            IMDBUrl oneUrl = new IMDBUrl(strURL, strName, "IMDB");

            _elements.Add(oneUrl);
        }
示例#12
0
        // Filmograpy and bio
        public bool GetActorDetails(IMDBUrl url, out IMDBActor actor)
        {
            actor = new IMDBActor();

            string[] vdbParserStr = VdbParserStringActorDetails();

            if (vdbParserStr == null || vdbParserStr.Length != 46)
            {
                return(false);
            }

            try
            {
                string absoluteUri;
                string strBody = GetPage(url.URL, "utf-8", out absoluteUri);

                if (strBody == null)
                {
                    return(false);
                }

                if (strBody.Length == 0)
                {
                    return(false);
                }

                #region Actor imdb id

                // IMDBActorID
                try
                {
                    int    pos = url.URL.LastIndexOf("nm");
                    string id  = url.URL.Substring(pos, 9).Replace("/", string.Empty);
                    actor.IMDBActorID = id;
                }
                catch (Exception) { }

                #endregion

                HTMLParser parser   = new HTMLParser(strBody);
                string     strThumb = string.Empty;
                string     value    = string.Empty;
                string     value2   = string.Empty;

                #region Actor name

                // Actor name
                if ((parser.skipToEndOf(vdbParserStr[0])) &&        // <title>
                    (parser.extractTo(vdbParserStr[1], ref value))) // - IMDb</title>
                {
                    value      = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value      = Util.Utils.RemoveParenthesis(value).Trim();
                    actor.Name = HttpUtility.HtmlDecode(value.Trim());
                }

                if (actor.Name == string.Empty)
                {
                    actor.Name = url.Title;
                }

                #endregion

                // Photo
                string parserTxt  = parser.Content;
                string photoBlock = string.Empty;

                #region Actor photo

                if (parser.skipToStartOf(vdbParserStr[2]) &&             // <td id="img_primary"
                    (parser.extractTo(vdbParserStr[3], ref photoBlock))) // </td>
                {
                    parser.Content = photoBlock;

                    if ((parser.skipToEndOf(vdbParserStr[4])) &&           // <img src="
                        (parser.extractTo(vdbParserStr[5], ref strThumb))) // "
                    {
                        actor.ThumbnailUrl = strThumb;
                    }
                    parser.Content = parserTxt;
                }

                #endregion

                #region Actor birth date

                // Birth date
                if ((parser.skipToEndOf(vdbParserStr[6])) &&          // >Born:</h4>
                    (parser.skipToEndOf(vdbParserStr[7])) &&          // birth_monthday=
                    (parser.skipToEndOf(vdbParserStr[8])) &&          // >
                    (parser.extractTo(vdbParserStr[9], ref value)) && // <
                    (parser.skipToEndOf(vdbParserStr[10])) &&         // year=
                    (parser.extractTo(vdbParserStr[11], ref value2))) // "

                {
                    actor.DateOfBirth = value + " " + value2;
                }

                #endregion

                #region Actor death date

                // Death date
                if ((parser.skipToEndOf(vdbParserStr[12])) &&          // >Died:</h4>
                    (parser.skipToEndOf(vdbParserStr[13])) &&          // death_monthday="
                    (parser.skipToEndOf(vdbParserStr[14])) &&          // >
                    (parser.extractTo(vdbParserStr[15], ref value)) && // <
                    (parser.skipToEndOf(vdbParserStr[16])) &&          // death_date="
                    (parser.extractTo(vdbParserStr[17], ref value2)))  // "
                {
                    actor.DateOfDeath = value + " " + value2;
                }

                #endregion

                parser.resetPosition();

                #region Actor birth place

                // Birth place
                if ((parser.skipToEndOf(vdbParserStr[18])) &&        // birth_place=
                    (parser.skipToEndOf(vdbParserStr[19])) &&        // >
                    (parser.extractTo(vdbParserStr[20], ref value))) // <
                {
                    actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
                }

                #endregion

                #region Actor death place

                // Death place
                if ((parser.skipToEndOf(vdbParserStr[21])) &&        // death_place=
                    (parser.skipToEndOf(vdbParserStr[22])) &&        // >
                    (parser.extractTo(vdbParserStr[23], ref value))) // <
                {
                    actor.PlaceOfDeath = HttpUtility.HtmlDecode(value);
                }

                #endregion

                //Mini Biography
                parser.resetPosition();

                #region Actor biography

                if ((parser.skipToEndOf(vdbParserStr[24])) &&        // <td id="overview-top">
                    (parser.skipToEndOf(vdbParserStr[25])) &&        // <p>
                    (parser.extractTo(vdbParserStr[26], ref value))) // See full bio</a>
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    actor.MiniBiography = Util.Utils.stripHTMLtags(value);
                    actor.MiniBiography = actor.MiniBiography.Replace(vdbParserStr[45], string.Empty).Trim(); // See full bio »
                    actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography);                        // Remove HTML entities like &#189;

                    if (actor.MiniBiography != string.Empty)
                    {
                        // get complete biography
                        string bioURL = absoluteUri;

                        if (!bioURL.EndsWith(vdbParserStr[27])) // /
                        {
                            bioURL += vdbParserStr[28];         // /bio
                        }
                        else
                        {
                            bioURL += vdbParserStr[29]; // bio
                        }

                        string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);

                        if (!string.IsNullOrEmpty(strBioBody))
                        {
                            HTMLParser parser1 = new HTMLParser(strBioBody);

                            if (parser1.skipToEndOf(vdbParserStr[30]) &&        // <h5>Mini Biography</h5>
                                parser1.skipToEndOf(vdbParserStr[31]) &&        // <div class="wikipedia_bio">
                                parser1.extractTo(vdbParserStr[32], ref value)) // </div>
                            {
                                value           = new HTMLUtil().ConvertHTMLToAnsi(value);
                                value           = Regex.Replace(value, @"</h5>\s<h5>", "\n\r");
                                value           = Regex.Replace(value, @"<h5>", "\n\r\n\r");
                                value           = Regex.Replace(value, @"</h5>", ":\n\r");
                                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                                actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
                            }
                            else
                            {
                                parser1.resetPosition();

                                if (parser1.skipToEndOf(vdbParserStr[33]) &&        // <h5>Mini Biography</h5>
                                    parser1.extractTo(vdbParserStr[34], ref value)) // </p>
                                {
                                    value           = new HTMLUtil().ConvertHTMLToAnsi(value);
                                    actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                                    actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
                                }
                            }
                        }
                    }
                }

                #endregion

                // Person is movie director or an actor/actress
                bool isActorPass    = false;
                bool isDirectorPass = false;
                bool isWriterPass   = false;

                parser.resetPosition();

                HTMLParser dirParser = new HTMLParser(); // HTML body for Director
                HTMLParser wriParser = new HTMLParser(); // HTML body for Writers

                #region Check person role in movie (actor, director or writer)

                if ((parser.skipToEndOf(vdbParserStr[35])) && // name="Director">Director</a>
                    (parser.skipToEndOf(vdbParserStr[36])))   // </div>
                {
                    isDirectorPass    = true;
                    dirParser.Content = parser.Content;
                }

                parser.resetPosition();

                if ((parser.skipToEndOf(vdbParserStr[37])) && // name="Writer">Writer</a>
                    (parser.skipToEndOf(vdbParserStr[38])))   // </div>
                {
                    isWriterPass      = true;
                    wriParser.Content = parser.Content;
                }

                parser.resetPosition();

                if (parser.skipToEndOf(vdbParserStr[39]) || // name="Actress">Actress</a>
                    parser.skipToEndOf(vdbParserStr[40]))   // name="Actor">Actor</a>
                {
                    isActorPass = true;
                }

                #endregion

                #region Get movies for every role

                // Get filmography Actor
                if (isActorPass)
                {
                    GetActorMovies(actor, parser, false, false);
                }

                // Get filmography for writers
                if (isWriterPass)
                {
                    parser = wriParser;
                    parser.resetPosition();

                    if ((parser.skipToEndOf(vdbParserStr[41])) && // name="Writer">Writer</a>
                        (parser.skipToEndOf(vdbParserStr[42])))   // </div>
                    {
                        GetActorMovies(actor, parser, false, true);
                    }
                }

                // Get filmography Director
                if (isDirectorPass)
                {
                    parser = dirParser;
                    parser.resetPosition();

                    if (parser.skipToEndOf(vdbParserStr[43]) && // name="Director">Director</a>
                        parser.skipToEndOf(vdbParserStr[44]))   // </div>
                    {
                        GetActorMovies(actor, parser, true, false);
                    }
                }

                #endregion

                // Add filmography
                if (actor.Count > 0)
                {
                    actor.SortActorMoviesByYear();
                }

                return(true);
            }
            catch (Exception ex)
            {
                Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
            }
            return(false);
        }
示例#13
0
        private void FindIMDBActor(string strURL)
        {
            string[] vdbParserStr = VdbParserStringActor();

            if (vdbParserStr == null || vdbParserStr.Length != 29)
            {
                return;
            }

            try
            {
                string absoluteUri;
                // UTF-8 have problem with special country chars, default IMDB enc is used
                string     strBody = GetPage(strURL, "utf-8", out absoluteUri);
                string     value   = string.Empty;
                HTMLParser parser  = new HTMLParser(strBody);

                if ((parser.skipToEndOf(vdbParserStr[0])) &&           // <title>
                    (parser.extractTo(vdbParserStr[1], ref value)) &&  // </title>
                    !value.ToLowerInvariant().Equals(vdbParserStr[2])) // imdb name search
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value = Util.Utils.RemoveParenthesis(value).Trim();
                    IMDBUrl oneUrl = new IMDBUrl(absoluteUri, value, "IMDB");
                    _elements.Add(oneUrl);
                    return;
                }

                parser.resetPosition();

                string popularBody = string.Empty;
                string exactBody   = string.Empty;
                string url         = string.Empty;
                string name        = string.Empty;
                string role        = string.Empty;

                if (parser.skipToStartOfNoCase(vdbParserStr[3]))        // Popular names
                {
                    parser.skipToEndOf(vdbParserStr[4]);                // <table>
                    parser.extractTo(vdbParserStr[5], ref popularBody); // </table>

                    parser = new HTMLParser(popularBody);

                    while (parser.skipToStartOf(vdbParserStr[6]))     // href="/name/
                    {
                        parser.skipToEndOf(vdbParserStr[7]);          // href="
                        parser.extractTo(vdbParserStr[8], ref url);   // "
                        parser.skipToEndOf(vdbParserStr[9]);          // Image()).src='/rg/find-name-
                        parser.skipToEndOf(vdbParserStr[10]);         // ';">
                        parser.extractTo(vdbParserStr[11], ref name); // </a>
                        parser.skipToEndOf(vdbParserStr[12]);         // <small>(
                        parser.extractTo(vdbParserStr[13], ref role); // ,

                        if (role != string.Empty)
                        {
                            name += " - " + role;
                        }

                        name = new HTMLUtil().ConvertHTMLToAnsi(name);
                        name = Util.Utils.RemoveParenthesis(name).Trim();
                        IMDBUrl newUrl = new IMDBUrl("http://www.imdb.com" + url, name, "IMDB");
                        _elements.Add(newUrl);
                        parser.skipToEndOf(vdbParserStr[14]); // </tr>
                    }
                }
                parser = new HTMLParser(strBody);

                if (parser.skipToStartOfNoCase(vdbParserStr[15]))      // Exact Matches
                {
                    parser.skipToEndOf(vdbParserStr[16]);              // <table>
                    parser.extractTo(vdbParserStr[17], ref exactBody); // </table>
                }
                else if (parser.skipToStartOfNoCase(vdbParserStr[18])) // Approx Matches
                {
                    parser.skipToEndOf(vdbParserStr[19]);              // <table>
                    parser.extractTo(vdbParserStr[20], ref exactBody); // </table>
                }
                else
                {
                    return;
                }

                parser = new HTMLParser(exactBody);
                url    = string.Empty;
                name   = string.Empty;
                role   = string.Empty;

                while (parser.skipToStartOf(vdbParserStr[21]))    // href="/name/
                {
                    parser.skipToEndOf(vdbParserStr[22]);         // href="
                    parser.extractTo(vdbParserStr[23], ref url);  // "
                    parser.skipToEndOf(vdbParserStr[24]);         // Image()).src='/rg/find-name-
                    parser.skipToEndOf(vdbParserStr[25]);         // ';">
                    parser.extractTo(vdbParserStr[26], ref name); // </a>
                    parser.skipToEndOf(vdbParserStr[27]);         // <small>(
                    parser.extractTo(vdbParserStr[28], ref role); // ,

                    if (role != string.Empty)
                    {
                        name += " - " + role;
                    }

                    name = new HTMLUtil().ConvertHTMLToAnsi(name);
                    name = Util.Utils.RemoveParenthesis(name).Trim();
                    IMDBUrl newUrl = new IMDBUrl("http://www.imdb.com" + url, name, "IMDB");
                    _elements.Add(newUrl);
                    parser.skipToEndOf(vdbParserStr[29]); // </tr>
                }
            }
            catch (Exception ex)
            {
                Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", strURL, ex.Message, ex.StackTrace);
            }
        }
示例#14
0
        // Changed - parsing all actor DB fields through HTML (IMDB changed HTML code)
        public bool GetActorDetails(IMDBUrl url, bool director, out IMDBActor actor)
        {
            actor = new IMDBActor();
            try
            {
                string absoluteUri;
                string strBody = GetPage(url.URL, "utf-8", out absoluteUri);
                if (strBody == null)
                {
                    return(false);
                }
                if (strBody.Length == 0)
                {
                    return(false);
                }
                // IMDBActorID
                try
                {
                    int    pos = url.URL.LastIndexOf("nm");
                    string id  = url.URL.Substring(pos, 9).Replace("/", string.Empty);
                    actor.IMDBActorID = id;
                }
                catch (Exception) {}

                HTMLParser parser   = new HTMLParser(strBody);
                string     strThumb = string.Empty;
                string     value    = string.Empty;
                string     value2   = string.Empty;
                // Actor name
                if ((parser.skipToEndOf("<title>")) &&
                    (parser.extractTo("- IMDb</title>", ref value)))
                {
                    value      = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value      = Util.Utils.RemoveParenthesis(value).Trim();
                    actor.Name = HttpUtility.HtmlDecode(value.Trim());
                }
                if (actor.Name == string.Empty)
                {
                    actor.Name = url.Title;
                }
                // Photo
                string parserTxt  = parser.Content;
                string photoBlock = string.Empty;
                if (parser.skipToStartOf("<td id=\"img_primary\"") &&
                    (parser.extractTo("</td>", ref photoBlock)))
                {
                    parser.Content = photoBlock;
                    if ((parser.skipToEndOf("<img src=\"")) &&
                        (parser.extractTo("\"", ref strThumb)))
                    {
                        actor.ThumbnailUrl = strThumb;
                    }
                    parser.Content = parserTxt;
                }
                // Birth date
                if ((parser.skipToEndOf("Born:")) &&
                    (parser.skipToEndOf("birth_monthday=")) &&
                    (parser.skipToEndOf(">")) &&
                    (parser.extractTo("<", ref value)) &&
                    (parser.skipToEndOf("year=")) &&
                    (parser.extractTo("\"", ref value2)))

                {
                    actor.DateOfBirth = value + " " + value2;
                }
                // Death date
                if ((parser.skipToEndOf(">Died:</h4>")) &&
                    (parser.skipToEndOf("deaths\">")) &&
                    (parser.extractTo("<", ref value)) &&
                    (parser.skipToEndOf("death_date=")) &&
                    (parser.extractTo("\"", ref value2)))
                {
                    if (actor.DateOfBirth == string.Empty)
                    {
                        actor.DateOfBirth = "?";
                    }
                    actor.DateOfBirth += " ~ " + value + " " + value2;
                }

                parser.resetPosition();
                // Birth place
                if ((parser.skipToEndOf("birth_place=")) &&
                    (parser.skipToEndOf(">")) &&
                    (parser.extractTo("<", ref value)))
                {
                    actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
                }
                //Mini Biography
                parser.resetPosition();
                if ((parser.skipToEndOf("<td id=\"overview-top\">")) &&
                    (parser.skipToEndOf("<p>")) &&
                    (parser.extractTo("See full bio</a>", ref value)))
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    actor.MiniBiography = Util.Utils.stripHTMLtags(value);
                    actor.MiniBiography = actor.MiniBiography.Replace("See full bio »", string.Empty).Trim();
                    actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like &#189;
                    if (actor.MiniBiography != string.Empty)
                    {
                        // get complete biography
                        string bioURL = absoluteUri;
                        if (!bioURL.EndsWith("/"))
                        {
                            bioURL += "/bio";
                        }
                        else
                        {
                            bioURL += "bio";
                        }
                        string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);
                        if (!string.IsNullOrEmpty(strBioBody))
                        {
                            HTMLParser parser1 = new HTMLParser(strBioBody);
                            if (parser1.skipToEndOf("<h5>Mini Biography</h5>") &&
                                parser1.extractTo("</p>", ref value))
                            {
                                value           = new HTMLUtil().ConvertHTMLToAnsi(value);
                                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                                actor.Biography = HttpUtility.HtmlDecode(actor.Biography); // Remove HTML entities like &#189;
                            }
                        }
                    }
                }
                // Person is movie director or an actor/actress
                bool isActorPass    = false;
                bool isDirectorPass = false;
                parser.resetPosition();

                if (director)
                {
                    if ((parser.skipToEndOf("name=\"Director\">Director</a>")) &&
                        (parser.skipToEndOf("</div>")))
                    {
                        isDirectorPass = true;
                    }
                }
                else
                {
                    if (parser.skipToEndOf("name=\"Actress\">Actress</a>") || parser.skipToEndOf("name=\"Actor\">Actor</a>"))
                    {
                        isActorPass = true;
                    }
                }
                // Get filmography
                if (isDirectorPass | isActorPass)
                {
                    string movies = string.Empty;
                    // Get films and roles block
                    if (parser.extractTo("<div id", ref movies))
                    {
                        parser.Content = movies;
                    }
                    // Parse block for evey film and get year, title and it's imdbID and role
                    while (parser.skipToStartOf("<span class=\"year_column\""))
                    {
                        string movie = string.Empty;
                        if (parser.extractTo("<div class", ref movie))
                        {
                            movie += "</li>";
                            HTMLParser movieParser = new HTMLParser(movie);
                            string     title       = string.Empty;
                            string     strYear     = string.Empty;
                            string     role        = string.Empty;
                            string     imdbID      = string.Empty;
                            // IMDBid
                            movieParser.skipToEndOf("title/");
                            movieParser.extractTo("/", ref imdbID);
                            // Title
                            movieParser.resetPosition();
                            movieParser.skipToEndOf("<a");
                            movieParser.skipToEndOf(">");
                            movieParser.extractTo("<br/>", ref title);
                            title = Util.Utils.stripHTMLtags(title);
                            title = title.Replace("\n", " ").Replace("\r", string.Empty);
                            title = HttpUtility.HtmlDecode(title.Trim()); // Remove HTML entities like &#189;
                            // Year
                            movieParser.resetPosition();
                            if (movieParser.skipToStartOf(">20") &&
                                movieParser.skipToEndOf(">"))
                            {
                                movieParser.extractTo("<", ref strYear);
                            }
                            else if (movieParser.skipToStartOf(">19") &&
                                     movieParser.skipToEndOf(">"))
                            {
                                movieParser.extractTo("<", ref strYear);
                            }
                            // Roles
                            if ((director == false) && (movieParser.skipToEndOf("<br/>"))) // Role case 1, no character link
                            {
                                movieParser.extractTo("<", ref role);
                                role = Util.Utils.stripHTMLtags(role).Trim();
                                role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                              .Replace("\r", string.Empty).Trim());
                                if (role == string.Empty) // Role case 2, with character link
                                {
                                    movieParser.resetPosition();
                                    movieParser.skipToEndOf("<br/>");
                                    movieParser.extractTo("</a>", ref role);
                                    role = Util.Utils.stripHTMLtags(role).Trim();
                                    role = HttpUtility.HtmlDecode(role.Replace("\n", " ")
                                                                  .Replace("\r", string.Empty).Trim());
                                }
                            }
                            else
                            {
                                // Just director
                                if (director)
                                {
                                    role = "Director";
                                }
                            }

                            int year = 0;
                            try
                            {
                                year = Int32.Parse(strYear.Substring(0, 4));
                            }
                            catch (Exception)
                            {
                                year = 1900;
                            }
                            IMDBActor.IMDBActorMovie actorMovie = new IMDBActor.IMDBActorMovie();
                            actorMovie.MovieTitle = title;
                            actorMovie.Role       = role;
                            actorMovie.Year       = year;
                            actorMovie.imdbID     = imdbID;
                            actor.Add(actorMovie);
                        }
                    }
                }
                return(true);
            }
            catch (Exception ex)
            {
                Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
            }
            return(false);
        }
示例#15
0
        // Changed - IMDB changed HTML code
        private void FindIMDBActor(string strURL)
        {
            try
            {
                string absoluteUri;
                // UTF-8 have problem with special country chars, default IMDB enc is used
                string     strBody = GetPage(strURL, "utf-8", out absoluteUri);
                string     value   = string.Empty;
                HTMLParser parser  = new HTMLParser(strBody);
                if ((parser.skipToEndOf("<title>")) &&
                    (parser.extractTo("</title>", ref value)) && !value.ToLower().Equals("imdb name search"))
                {
                    value = new HTMLUtil().ConvertHTMLToAnsi(value);
                    value = Util.Utils.RemoveParenthesis(value).Trim();
                    IMDBUrl oneUrl = new IMDBUrl(absoluteUri, value, "IMDB");
                    _elements.Add(oneUrl);
                    return;
                }
                parser.resetPosition();

                while (parser.skipToEndOfNoCase("Exact Matches"))
                {
                    string url  = string.Empty;
                    string name = string.Empty;
                    //<a href="/name/nm0000246/" onclick="set_args('nm0000246', 1)">Bruce Willis</a>
                    if (parser.skipToStartOf("href=\"/name/"))
                    {
                        parser.skipToEndOf("href=\"");
                        parser.extractTo("\"", ref url);
                        parser.skipToEndOf("<br><a");
                        parser.skipToEndOf(">");
                        parser.extractTo("</a>", ref name);
                        name = new HTMLUtil().ConvertHTMLToAnsi(name);
                        name = Util.Utils.RemoveParenthesis(name).Trim();
                        IMDBUrl newUrl = new IMDBUrl("http://akas.imdb.com" + url, name, "IMDB");
                        _elements.Add(newUrl);
                    }
                    else
                    {
                        parser.skipToEndOfNoCase("</a>");
                    }
                }
                // Maybe more actors with the similar name
                parser.resetPosition();

                while (parser.skipToEndOfNoCase("Popular Names"))
                {
                    string url  = string.Empty;
                    string name = string.Empty;
                    //<a href="/name/nm0000246/" onclick="set_args('nm0000246', 1)">Bruce Willis</a>
                    if (parser.skipToStartOf("href=\"/name/"))
                    {
                        parser.skipToEndOf("href=\"");
                        parser.extractTo("\"", ref url);
                        parser.skipToEndOf("<br><a");
                        parser.skipToEndOf(">");
                        parser.extractTo("</a>", ref name);
                        name = new HTMLUtil().ConvertHTMLToAnsi(name);
                        name = Util.Utils.RemoveParenthesis(name).Trim();
                        IMDBUrl newUrl = new IMDBUrl("http://akas.imdb.com" + url, name, "IMDB");
                        _elements.Add(newUrl);
                    }
                    else
                    {
                        parser.skipToEndOfNoCase("</a>");
                    }
                }
            }
            catch (Exception ex)
            {
                Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", strURL, ex.Message, ex.StackTrace);
            }
        }
示例#16
0
    private void FindIMDBActor(string strURL)
    {
      string[] vdbParserStr = VdbParserStringActor();

      if (vdbParserStr == null || vdbParserStr.Length != 29)
      {
        return;
      }

      try
      {
        string absoluteUri;
        // UTF-8 have problem with special country chars, default IMDB enc is used
        string strBody = GetPage(strURL, "utf-8", out absoluteUri);
        string value = string.Empty;
        HTMLParser parser = new HTMLParser(strBody);
        
        if ((parser.skipToEndOf(vdbParserStr[0])) &&          // <title>
            (parser.extractTo(vdbParserStr[1], ref value)) && // </title>
            !value.ToLower().Equals(vdbParserStr[2]))         // imdb name search
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          value = Util.Utils.RemoveParenthesis(value).Trim();
          IMDBUrl oneUrl = new IMDBUrl(absoluteUri, value, "IMDB");
          _elements.Add(oneUrl);
          return;
        }

        parser.resetPosition();

        string popularBody = string.Empty;
        string exactBody = string.Empty;
        string url = string.Empty;
        string name = string.Empty;
        string role = string.Empty;

        if (parser.skipToStartOfNoCase(vdbParserStr[3]))      // Popular names
        {
          parser.skipToEndOf(vdbParserStr[4]);                // <table>
          parser.extractTo(vdbParserStr[5], ref popularBody); // </table>

          parser = new HTMLParser(popularBody);
          
          while (parser.skipToStartOf(vdbParserStr[6]))       // href="/name/
          {
            parser.skipToEndOf(vdbParserStr[7]);              // href="
            parser.extractTo(vdbParserStr[8], ref url);       // "
            parser.skipToEndOf(vdbParserStr[9]);              // Image()).src='/rg/find-name-
            parser.skipToEndOf(vdbParserStr[10]);             // ';">
            parser.extractTo(vdbParserStr[11], ref name);     // </a>
            parser.skipToEndOf(vdbParserStr[12]);             // <small>(
            parser.extractTo(vdbParserStr[13], ref role);     // ,
            
            if (role != string.Empty)
            {
              name += " - " + role;
            }

            name = new HTMLUtil().ConvertHTMLToAnsi(name);
            name = Util.Utils.RemoveParenthesis(name).Trim();
            IMDBUrl newUrl = new IMDBUrl("http://www.imdb.com" + url, name, "IMDB");
            _elements.Add(newUrl);
            parser.skipToEndOf(vdbParserStr[14]); // </tr>
          }
        }
        parser = new HTMLParser(strBody);
        
        if (parser.skipToStartOfNoCase(vdbParserStr[15]))       // Exact Matches
        {
          parser.skipToEndOf(vdbParserStr[16]);                 // <table>
          parser.extractTo(vdbParserStr[17], ref exactBody);    // </table>
        }
        else if (parser.skipToStartOfNoCase(vdbParserStr[18]))  // Approx Matches
        {
          parser.skipToEndOf(vdbParserStr[19]);                 // <table>
          parser.extractTo(vdbParserStr[20], ref exactBody);    // </table>
        }
        else
        {
          return;
        }

        parser = new HTMLParser(exactBody);
        url = string.Empty;
        name = string.Empty;
        role = string.Empty;
        
        while (parser.skipToStartOf(vdbParserStr[21]))  // href="/name/
        {
          parser.skipToEndOf(vdbParserStr[22]);         // href="
          parser.extractTo(vdbParserStr[23], ref url);  // "
          parser.skipToEndOf(vdbParserStr[24]);         // Image()).src='/rg/find-name-
          parser.skipToEndOf(vdbParserStr[25]);         // ';">
          parser.extractTo(vdbParserStr[26], ref name); // </a>
          parser.skipToEndOf(vdbParserStr[27]);         // <small>(
          parser.extractTo(vdbParserStr[28], ref role); // ,

          if (role != string.Empty)
          {
            name += " - " + role;
          }

          name = new HTMLUtil().ConvertHTMLToAnsi(name);
          name = Util.Utils.RemoveParenthesis(name).Trim();
          IMDBUrl newUrl = new IMDBUrl("http://www.imdb.com" + url, name, "IMDB");
          _elements.Add(newUrl);
          parser.skipToEndOf(vdbParserStr[29]); // </tr>
        }
      }
      catch (Exception ex)
      {
        Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", strURL, ex.Message, ex.StackTrace);
      }
    }
示例#17
0
    // Filmograpy and bio
    public bool GetActorDetails(IMDBUrl url, out IMDBActor actor)
    {
      actor = new IMDBActor();

      string[] vdbParserStr = VdbParserStringActorDetails();

      if (vdbParserStr == null || vdbParserStr.Length != 46)
      {
        return false;
      }

      try
      {
        string absoluteUri;
        string strBody = GetPage(url.URL, "utf-8", out absoluteUri);
        
        if (strBody == null)
        {
          return false;
        }
        
        if (strBody.Length == 0)
        {
          return false;
        }
        
        #region Actor imdb id

        // IMDBActorID
        try
        {
          int pos = url.URL.LastIndexOf("nm");
          string id = url.URL.Substring(pos, 9).Replace("/", string.Empty);
          actor.IMDBActorID = id;
        }
        catch (Exception) { }

        #endregion

        HTMLParser parser = new HTMLParser(strBody);
        string strThumb = string.Empty;
        string value = string.Empty;
        string value2 = string.Empty;
        
        #region Actor name

        // Actor name
        if ((parser.skipToEndOf(vdbParserStr[0])) &&        // <title>
            (parser.extractTo(vdbParserStr[1], ref value))) // - IMDb</title>
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          value = Util.Utils.RemoveParenthesis(value).Trim();
          actor.Name = HttpUtility.HtmlDecode(value.Trim());
        }
        
        if (actor.Name == string.Empty)
        {
          actor.Name = url.Title;
        }

        #endregion

        // Photo
        string parserTxt = parser.Content;
        string photoBlock = string.Empty;

        #region Actor photo

        if (parser.skipToStartOf(vdbParserStr[2]) &&              // <td id="img_primary"
            (parser.extractTo(vdbParserStr[3], ref photoBlock)))  // </td>
        {
          parser.Content = photoBlock;
        
          if ((parser.skipToEndOf(vdbParserStr[4])) &&            // <img src="
              (parser.extractTo(vdbParserStr[5], ref strThumb)))  // "
          {
            actor.ThumbnailUrl = strThumb;
          }
          parser.Content = parserTxt;
        }
        
        #endregion

        #region Actor birth date

        // Birth date
        if ((parser.skipToEndOf(vdbParserStr[6])) &&          // >Born:</h4>
            (parser.skipToEndOf(vdbParserStr[7])) &&          // birth_monthday=
            (parser.skipToEndOf(vdbParserStr[8])) &&          // >
            (parser.extractTo(vdbParserStr[9], ref value)) && // <
            (parser.skipToEndOf(vdbParserStr[10])) &&         // year=
            (parser.extractTo(vdbParserStr[11], ref value2))) // "

        {
          actor.DateOfBirth = value + " " + value2;
        }

        #endregion

        #region Actor death date

        // Death date
        if ((parser.skipToEndOf(vdbParserStr[12])) &&           // >Died:</h4>
            (parser.skipToEndOf(vdbParserStr[13])) &&           // death_monthday="
            (parser.skipToEndOf(vdbParserStr[14])) &&           // >
            (parser.extractTo(vdbParserStr[15], ref value)) &&  // <
            (parser.skipToEndOf(vdbParserStr[16])) &&           // death_date="
            (parser.extractTo(vdbParserStr[17], ref value2)))   // "
        {
          actor.DateOfDeath = value + " " + value2;
        }

        #endregion

        parser.resetPosition();

        #region Actor birth place

        // Birth place
        if ((parser.skipToEndOf(vdbParserStr[18])) &&         // birth_place=
            (parser.skipToEndOf(vdbParserStr[19])) &&         // >
            (parser.extractTo(vdbParserStr[20], ref value)))  // <
        {
          actor.PlaceOfBirth = HttpUtility.HtmlDecode(value);
        }

        #endregion

        #region Actor death place

        // Death place
        if ((parser.skipToEndOf(vdbParserStr[21])) &&         // death_place=
            (parser.skipToEndOf(vdbParserStr[22])) &&         // >
            (parser.extractTo(vdbParserStr[23], ref value)))  // <
        {
          actor.PlaceOfDeath = HttpUtility.HtmlDecode(value);
        }

        #endregion

        //Mini Biography
        parser.resetPosition();

        #region Actor biography

        if ((parser.skipToEndOf(vdbParserStr[24])) &&         // <td id="overview-top">
            (parser.skipToEndOf(vdbParserStr[25])) &&         // <p>
            (parser.extractTo(vdbParserStr[26], ref value)))  // See full bio</a>
        {
          value = new HTMLUtil().ConvertHTMLToAnsi(value);
          actor.MiniBiography = Util.Utils.stripHTMLtags(value);
          actor.MiniBiography = actor.MiniBiography.Replace(vdbParserStr[45], string.Empty).Trim(); // See full bio »
          actor.MiniBiography = HttpUtility.HtmlDecode(actor.MiniBiography); // Remove HTML entities like &#189;
          
          if (actor.MiniBiography != string.Empty)
          {
            // get complete biography
            string bioURL = absoluteUri;
            
            if (!bioURL.EndsWith(vdbParserStr[27])) // /
            {
              bioURL += vdbParserStr[28];           // /bio
            }
            else
            {
              bioURL += vdbParserStr[29];           // bio
            }

            string strBioBody = GetPage(bioURL, "utf-8", out absoluteUri);
            
            if (!string.IsNullOrEmpty(strBioBody))
            {
              HTMLParser parser1 = new HTMLParser(strBioBody);

              if (parser1.skipToEndOf(vdbParserStr[30]) &&        // <h5>Mini Biography</h5>
                  parser1.skipToEndOf(vdbParserStr[31]) &&        // <div class="wikipedia_bio">
                  parser1.extractTo(vdbParserStr[32], ref value)) // </div>
              {
                value = new HTMLUtil().ConvertHTMLToAnsi(value);
                value = Regex.Replace(value, @"</h5>\s<h5>", "\n\r");
                value = Regex.Replace(value, @"<h5>", "\n\r\n\r");
                value = Regex.Replace(value, @"</h5>", ":\n\r");
                actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
              }
              else
              {
                parser1.resetPosition();
                
                if (parser1.skipToEndOf(vdbParserStr[33]) &&      // <h5>Mini Biography</h5>
                  parser1.extractTo(vdbParserStr[34], ref value)) // </p>
                {
                  value = new HTMLUtil().ConvertHTMLToAnsi(value);
                  actor.Biography = Util.Utils.stripHTMLtags(value).Trim();
                  actor.Biography = HttpUtility.HtmlDecode(actor.Biography);
                }
              }
            }
          }
        }

        #endregion

        // Person is movie director or an actor/actress
        bool isActorPass = false;
        bool isDirectorPass = false;
        bool isWriterPass = false;
        
        parser.resetPosition();

        HTMLParser dirParser = new HTMLParser(); // HTML body for Director
        HTMLParser wriParser = new HTMLParser(); // HTML body for Writers

        #region Check person role in movie (actor, director or writer)

        if ((parser.skipToEndOf(vdbParserStr[35])) && // name="Director">Director</a>
            (parser.skipToEndOf(vdbParserStr[36])))   // </div>
        {
          isDirectorPass = true;
          dirParser.Content = parser.Content;
        }
        
        parser.resetPosition();

        if ((parser.skipToEndOf(vdbParserStr[37])) && // name="Writer">Writer</a>
            (parser.skipToEndOf(vdbParserStr[38])))   // </div>
        {
          isWriterPass = true;
          wriParser.Content = parser.Content;
        }

        parser.resetPosition();

        if (parser.skipToEndOf(vdbParserStr[39]) || // name="Actress">Actress</a>
          parser.skipToEndOf(vdbParserStr[40]))     // name="Actor">Actor</a>
        {
          isActorPass = true;
        }

        #endregion

        #region Get movies for every role

        // Get filmography Actor
        if (isActorPass)
        {
          GetActorMovies(actor, parser, false, false);
        }
        
        // Get filmography for writers
        if (isWriterPass)
        {
          parser = wriParser;
          parser.resetPosition();

          if ((parser.skipToEndOf(vdbParserStr[41])) && // name="Writer">Writer</a>
            (parser.skipToEndOf(vdbParserStr[42])))     // </div>
          {
            GetActorMovies(actor, parser, false, true);
          }
        }

        // Get filmography Director
        if (isDirectorPass)
        {
          parser = dirParser;
          parser.resetPosition();
          
          if (parser.skipToEndOf(vdbParserStr[43]) && // name="Director">Director</a>
              parser.skipToEndOf(vdbParserStr[44]))   // </div>
          {
            GetActorMovies(actor, parser, true, false);
          }
        }

        #endregion

        // Add filmography
        if (actor.Count > 0)
        {
          actor.SortActorMoviesByYear();
        }

        return true;
      }
      catch (Exception ex)
      {
        Log.Error("IMDB.GetActorDetails({0} exception:{1} {2} {3}", url.URL, ex.Message, ex.Source, ex.StackTrace);
      }
      return false;
    }
示例#18
0
文件: Grab.cs 项目: GuzziMP/my-films
        public ArrayList ReturnURL(string strSearch, string strConfigFile, int strPage, bool alwaysAsk, string strMediaPath)
        {
            string strFileBasedReader = string.Empty;
              string strURLFile = string.Empty;
              string strDBName = string.Empty;
              string cleanup = string.Empty;

              XmlDocument doc = new XmlDocument();
              doc.Load(strConfigFile);
              XmlNode n = doc.ChildNodes[1].FirstChild;

              try { strDBName = XmlConvert.DecodeName(n.SelectSingleNodeFast("DBName").InnerText); }
              catch { strDBName = "ERROR"; }
              try { strURLFile = XmlConvert.DecodeName(n.SelectSingleNodeFast("URLSearch/URL").InnerText); }
              catch { strURLFile = ""; }
              try { strFileBasedReader = XmlConvert.DecodeName(n.SelectSingleNodeFast("FileBasedReader").InnerText); }
              catch { strFileBasedReader = "false"; }
              try { cleanup = XmlConvert.DecodeName(n.SelectSingleNodeFast("SearchCleanup").InnerText); }
              catch (Exception) { cleanup = string.Empty; }

              if (strFileBasedReader == "true" || strSearch.Contains("\\")) // if a mediapath is given and file name is part of the search expression... assume it's nfo/xml/xbmc reader request and return the proper file to read in details
              {
            #region read from file - mediapath

            if (string.IsNullOrEmpty(strMediaPath) && strSearch.Contains("\\")) strMediaPath = strSearch;

            elements.Clear();
            if (!string.IsNullOrEmpty(strURLFile))
            {
              try
              {
            string directory = Path.GetDirectoryName(strMediaPath); // get directory name of media file
            string filename = Path.GetFileNameWithoutExtension(strMediaPath); // get filename without extension
            //strSearch = GrabUtil.EncodeSearch(strSearch);
            foreach (string file in Directory.GetFiles(directory, strURLFile.Replace("#Filename#", filename), SearchOption.TopDirectoryOnly))   // search exact match
            {
              string fileShortname = Path.GetFileName(file);
              var url = new IMDBUrl(file, fileShortname, strDBName, n, "1");
              LogMyFilms.Debug("ReturnURL() - Found and adding nfo file '" + fileShortname + "', fullpath = '" + file + "'");
              elements.Add(url);
            }
            if (elements.Count == 0) // if no results found, try searching with cleaned filename
            {
              foreach (var file in Directory.GetFiles(directory, strURLFile.Replace("#Filename#", GrabUtil.ReplaceNormalOrRegex(filename, cleanup)), SearchOption.TopDirectoryOnly))   // cleanup filename for better search results
              {
                string fileShortname = Path.GetFileName(file);
                var url = new IMDBUrl(file, fileShortname, strDBName, n, "1");
                LogMyFilms.Debug("ReturnURL() - Found and adding nfo file '" + fileShortname + "', fullpath = '" + file + "'");
                elements.Add(url);
              }
            }
              }
              catch (Exception e)
              {
            LogMyFilms.Debug(e, "ReturnURL() - Catched Exception: " + e.Message);
              }
            }
            #endregion
            return elements;
              }

              // if no local grabbing, do web grabbing:
              if (strPage == -1)
              {
            // First run, finding the key starting page number
            //Loading the configuration file
            //XmlDocument doc = new XmlDocument();
            //doc.Load(strConfigFile);
            //XmlNode n = doc.ChildNodes[1].FirstChild;
            //Gets Key to the first page if it exists (not required)
            try
            { strPage = Convert.ToInt16(XmlConvert.DecodeName(n.SelectSingleNodeFast("URLSearch/KeyStartPage").InnerText)); }
            catch
            { strPage = 1; }
              }
              var grab = new GrabberUrlClass();
              Int16 wIndex;
              do
              {
            if (strSearch.LastIndexOf(".", StringComparison.Ordinal) == strSearch.Length - 1)
              strSearch = strSearch.Substring(0, strSearch.Length - 1);
            else
              break;
              } while (true);
              grab.FindMovies(strSearch, strConfigFile, strPage, alwaysAsk, out elements, out wIndex);
              if (wIndex >= 0)
              {
            IMDBUrl wurl = (IMDBUrl)elements[wIndex];
            elements.Clear();
            elements.Add(wurl);
              }
              return elements;
        }