internal static void IgnoreIrrelevantResults
            (Match match, IMDbRegEx imDbRegex, IMDbSearchResult result)
        {

            //TODO: Also ignore "Making Of" titles.
            //TODO: Also ignore documentaries (option).

            string extra = imDbRegex.GetMatchValue
                (match, "Extra", true);


            IgnoreVideoGameResult(result, extra);

        }
        internal static List<IIMDbSearchResult> SingleMatchAddToSearchResults
            (string html, IMDbRegEx imDbRegex, List<IIMDbSearchResult> results)
        {


            var titleIndexFound = LocateTitleIndex(html);


            if (!titleIndexFound)
                return results;

            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "A single result was found.");


            //TODO: Are these RegExes the same as the 'movie result' ones?
            const string titleAndYearPattern = @"<title>(?<Title>.*?)\((?<Year>.*?)\)</title>";
            

            const string ImdbIdPattern = @"<a\shref=""/title/(?<IMDbID>[^/]*)/fullcredits""";
            
            Match match = imDbRegex.GetRegExMatch(html, titleAndYearPattern);

            #region Return if no matches were found
            if (match == null || match.Length == 0)
                return results;
            #endregion


            var result = new IMDbSearchResult
                             {
                                 Title = imDbRegex.GetMatchValue(match, "Title", true),
                                 Year = imDbRegex.GetMatchValue(match, "Year", true)
                             };



            MineImdbIdFromSingleFilmMatch
                (html, imDbRegex, ImdbIdPattern, result);

            AddSingleFilmResultToSearchResults
                (results, result);


            return results;
        }
        internal static IMDbSearchResult MultipleMatchesMineDetailsOfSingleFilmResult
            (Match match, IMDbRegEx imDbRegex)
        {


            #region Result's details

            Debugger.LogMessageToFile(
                "[IMDb Conventional Film Search Engine] " +
                "New search result was found. " +
                "Proceeding to add result to the list of total search results...");

            var result = new IMDbSearchResult();


            result.IMDb_ID = imDbRegex.GetMatchValue
                (match, "IMDbID", true);

            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "Result IMDb ID: " + result.IMDb_ID);

            result.Title = imDbRegex.GetMatchValue
                (match, "Title", true);

            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "Result Title: " + result.Title);

            result.Year = imDbRegex.GetMatchValue
                (match, "Year", true);

            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "Result Year: " + result.Year);

            string URL = imDbRegex.GetMatchValue
                (match, "URL", true);

            result.URL = "http://www.imdb.com" + URL;

            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "Result URL: " + result.URL);

            #endregion

            return result;
        }
        internal static void MineImdbIdFromSingleFilmMatch
            (string html, IMDbRegEx imDbRegex, 
             string imdbIdPattern, IIMDbSearchResult result)
        {
            Match match = imDbRegex.GetRegExMatch(html, imdbIdPattern);

            if (match != null && match.Length > 0)
                result.IMDb_ID =
                    imDbRegex.GetMatchValue
                        (match, "IMDbID", true);
        }
コード例 #5
0
        internal static void GetRuntime
            (IIMDbMovie movie,
            string trimmedHTML, 
            IMDbRegEx imDbRegEx)
        {

            Debugger.LogMessageToFile
                ("[IMDb film details downloader]" +
                " Extracting Runtime...");

            Match match = imDbRegEx.GetRegExMatch
                (trimmedHTML, imDbRegEx.RuntimePattern);
            
            string runtime = imDbRegEx.GetMatchValue
                (match, "Runtime", true);
            
            runtime = IMDbMovieDetailsDownloaderHelpers.FixRuntime(runtime);
            
            movie.Runtime = runtime;
            
            Debugger.LogMessageToFile("IMDb returned Runtime: " + movie.Runtime);
            //MessageBox.Show("IMDb returned Runtime: " + movie.Runtime);
        
        }
コード例 #6
0
        internal static void GetRating
            (IIMDbMovie movie, 
            string trimmedHtml, IMDbRegEx imDbRegEx)
        {


            if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieRatings)
                return;

            Debugger.LogMessageToFile
                ("[IMDb film details downloader]" +
                                      " Extracting Rating...");

            Match match = imDbRegEx.GetRegExMatch
                (trimmedHtml, imDbRegEx.RatingPattern);

            movie.Rating = imDbRegEx.GetMatchValue
                (match, "Rating", true);

            Debugger.LogMessageToFile("IMDb returned Rating: "
                                      + movie.Rating);

        }
コード例 #7
0
        private static string CleanHtmlPage(string html)
        {
            IMDbRegEx imDbRegEx = new IMDbRegEx();
            imDbRegEx.SetRegExPatterns();

            Debugger.LogMessageToFile
                ("Cleaning IMDb result html ");


            string trimmedHtml = imDbRegEx.GetRegExString
                (html, imDbRegEx.TrimmedHTMLpattern);

            return trimmedHtml;
        }
        private static List<IIMDbSearchResult> MultipleMatchesAddToSearchResults
            (Match match, IMDbRegEx imDbRegex, List<IIMDbSearchResult> results)
        {

            while (match != null && match.Length > 0)
            {


                var result = IMDbConventionalFilmSearchEngineHelpers
                    .MultipleMatchesMineDetailsOfSingleFilmResult(match, imDbRegex);


                IMDbConventionalFilmSearchEngineHelpers
                    .IgnoreIrrelevantResults
                    (match, imDbRegex, result);


                results.Add(result);
                
                Debugger.LogMessageToFile
                    ("[IMDb Conventional Film Search Engine] " +
                     "Result was added to list.");
                
                match = match.NextMatch();
                
                Debugger.LogMessageToFile
                    ("[IMDb Conventional Film Search Engine]" +
                     " Proceeding to next result...");
            
            }




          return results;
        }
コード例 #9
0
        internal static void GetReleaseDate
            (IIMDbMovie movie,
            string trimmedHTML, IMDbRegEx imDbRegEx)
        {


            if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieReleaseDate)
                return;

            Debugger.LogMessageToFile("[IMDb film details downloader]" +
                                      " Extracting Release Date...");

            Match match = imDbRegEx.GetRegExMatch
                (trimmedHTML, imDbRegEx.ReleaseDatePattern);

            movie.Release_Date = imDbRegEx.GetMatchValue
                (match, "ReleaseDate", true);

            Debugger.LogMessageToFile("[IMDb film details downloader]" +
                                      " IMDb returned Release Date: " + movie.Release_Date);

            //MessageBox.Show("IMDb returned Release Date: " + movie.Release_Date);

        }
コード例 #10
0
        internal static void GetStudio
            (IMDbMovie movie,
            string trimmedHtml,
            IMDbRegEx imDbRegEx)
        {


            if (!ImdbFilmDetailsIndividualChoices
                .GetIMDbMovieProductionStudio)
                return;

            Debugger.LogMessageToFile
                ("[IMDb film details downloader] " +
                 "Extracting Studio...");


            Match match
                = imDbRegEx
                .GetRegExMatch
                (trimmedHtml, 
                imDbRegEx.StudioPattern);


            movie.Studio = imDbRegEx.GetMatchValue
                (match, "Studio", true);


            Debugger.LogMessageToFile
                ("[IMDb film details downloader] " +
                 "IMDb returned Studio: " 
                 + movie.Studio);


        }
コード例 #11
0
        internal static void GetReview
            (IMDbMovie movie,
            string trimmedHTML,
            IMDbRegEx imDbRegEx)
        {


            if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieReviews)
                return;

            Debugger.LogMessageToFile("[IMDb film details downloader] Extracting Review score...");
          
            Match match = imDbRegEx.GetRegExMatch(trimmedHTML, imDbRegEx.ReviewPattern);
           
            movie.Review = imDbRegEx.GetMatchValue(match, "Review", true);
           
            if (movie.Review.Contains("/"))
                movie.Review = movie.Review.Substring(0, movie.Review.IndexOf("/", System.StringComparison.Ordinal));
          
            Debugger.LogMessageToFile("IMDb returned Review: " + movie.Review);

        }
コード例 #12
0
        internal static void ExtractRatingDescription
            (IMDbMovie movie,
            string trimmedHTML, IMDbRegEx imDbRegEx)
        {


            if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieRatingDescription)
                return;

            Debugger.LogMessageToFile("[IMDb film details downloader] Extracting Rating Description...");

            Match match = 
                imDbRegEx.GetRegExMatch
                (trimmedHTML, imDbRegEx.RatingDescriptionPattern);
         
            movie.RatingDescription = 
                imDbRegEx.GetMatchValue
                (match, "RatingDescription", true);

            Debugger.LogMessageToFile("[IMDb film details downloader]  IMDb returned Rating Description: " +
                                      movie.RatingDescription);


        }
コード例 #13
0
        internal static void MineOverviewUsingRegex
            (IMDbMovie movie,
            string trimmedHTML,
            IMDbRegEx imDbRegEx )
        {


            if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieShortOverview)
                return;

            Debugger.LogMessageToFile
                ("[IMDb film details downloader]" +
                 " Extracting Short Overview...");
            

            Match match = imDbRegEx.GetRegExMatch
                (trimmedHTML, imDbRegEx.FilmDescriptionPattern);


            movie.OverviewShort =
                imDbRegEx.GetMatchValue
                (match, "ShortOverview", true);
            
            if (movie.OverviewShort.ToLower().EndsWith("more"))
                movie.OverviewShort =
                    movie.OverviewShort.Substring
                    (0, movie.OverviewShort.Length - 4).Trim();
            
            movie.OverviewShort = movie.OverviewShort.Trim() + "...";

            Debugger.LogMessageToFile("IMDb returned Overview: " + movie.OverviewShort);


        }
		internal static Match GetMovieTitleRegexMatch
			(string imdbID, string trimmedHtml)
		{




			Debugger.LogMessageToFile
				("[IMDb Movie Details Downloader]" +
				 " Getting Title regex match...");


			var imDbRegEx = new IMDbRegEx();
			imDbRegEx.SetRegExPatterns();


			Match match = imDbRegEx.GetRegExMatch
				(trimmedHtml,
				 IMDbRegEx.TitlePatternOriginal);



			try
			{
				string filmTitle
					= match.Groups[1].Captures[0].Value;
				return match;

			}
			catch (Exception)
			{


				match = imDbRegEx.GetRegExMatch
					(trimmedHtml,
					 IMDbRegEx.TitlePatternPrimary);


				try
				{
					string filmTitle
						= match.Groups[1].Captures[0].Value;

					return match;

				}
				catch (Exception e)
				{


					Debugger.LogMessageToFile
						("[IMDb Movie Details Downloader] " +
						 "The IMDb Movie Details Downloader was unable" +
						 " to extract the movie title " +
						 "for the movie with IMDb ID: "
						 + imdbID + ".");


					return match;
				}




			}



		}
コード例 #15
0
        internal static void MineProductionYearUsingRegex
            (IIMDbMovie movie, string trimmedHtml, IMDbRegEx imDbRegEx)
        {


            Debugger.LogMessageToFile("[IMDb film details downloader]" +
                                      " Extracting Year...");

            Match match = imDbRegEx.GetRegExMatch
                (trimmedHtml, imDbRegEx.YearPattern);

            movie.Year = imDbRegEx.GetMatchValue
                (match, "Year", true);
            
            movie.Year = movie.Year.TrimEnd('/');

            Debugger.LogMessageToFile
                ("[IMDb film details downloader] " +
                 "IMDb returned Year: " + movie.Year);
            
            //MessageBox.Show
            //    ("IMDb returned Year: " + movie.Year);
        
        }
            AddSingleOrMultipleMatchesToSearchResults
            (IMDbRegEx imDbRegex, string html,
             bool multipleResultsFound, 
             List<IIMDbSearchResult> results)
        {


            if (!multipleResultsFound)
            {
                Debugger.LogMessageToFile
                    ("[IMDb Conventional Film Search Engine] " +
                     "Multiple results were not found.");

                results = IMDbConventionalFilmSearchEngineHelpers.SingleMatchAddToSearchResults
                    (html, imDbRegex, results);
            }
            else
            {
                Debugger.LogMessageToFile
                    ("[IMDb Conventional Film Search Engine] " +
                     "Multiple results were found.");

                #region Result Pattern 1

                Match match = imDbRegex.GetRegExMatch(html, imDbRegex.MovieResultPattern1);

                results = MultipleMatchesAddToSearchResults(match, imDbRegex, results);

                #endregion

                #region Result Pattern 2

                match = imDbRegex.GetRegExMatch(html, imDbRegex.MovieResultPattern2);

                results = MultipleMatchesAddToSearchResults(match, imDbRegex, results);

                #endregion
            }

            return results;
        }
コード例 #17
0
        internal static void GetTagline
            (IIMDbMovie movie,
            string trimmedHTML, IMDbRegEx imDbRegEx)
        {


            if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieTaglines)
                return;

            Debugger.LogMessageToFile("Extracting Tagline...");

            Match match = imDbRegEx.GetRegExMatch
                (trimmedHTML, imDbRegEx.TaglinePattern);

            movie.Tagline = imDbRegEx.GetMatchValue
                (match, "Tagline", true);

            Debugger.LogMessageToFile("IMDb returned Tagline: " + movie.Tagline);
            //MessageBox.Show("IMDb returned Tagline: " + movie.Tagline);

        }
        public static IList<IIMDbSearchResult> SearchForTitleReturnResults(string title)
        {



            #region Vars
            

            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "Initializing variables...");
            
            var searchResults = new List<IIMDbSearchResult>();
            

            var imDbRegex = new IMDbRegEx(); 
            //TODO: Test if SetPatterns() is correctly called and regexes are succesfully loaded!
            

            string titleEncoded = ToolBox.Utils.WebUtils.EncodeURL(title);
            
            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "Title to search for: " + title);
            
            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "Encoded title: " + titleEncoded);
            

            string SearchURL = "http://www.imdb.com/find?s=title&q=" + titleEncoded;
            //string SearchURL = "http://m.imdb.com/find?q=" + titleEncoded + "&button=Search";
            
            Debugger.LogMessageToFile
                ("[IMDb Conventional Film Search Engine] " +
                 "IMDb search URL: " + SearchURL);
            
            #endregion



            var watch = new Stopwatch();
            watch.Start();


            long millisecondsnow;



            string searchResultsHtml
                = DownloadSearchResultsHtml
                (watch, SearchURL);
            

            if ( String.IsNullOrEmpty
                ( searchResultsHtml ))
                return searchResults;


            var multipleResultsFound 
                = IMDbConventionalFilmSearchEngineHelpers.DretectMultipleResults
                (searchResultsHtml);


            searchResults = AddSingleOrMultipleMatchesToSearchResults
                (imDbRegex, searchResultsHtml, 
                 multipleResultsFound, searchResults);



            //MessageBox.Show("Results: " + results.Count.ToString() );            

            millisecondsnow = watch.ElapsedMilliseconds;

            //MessageBox.Show
            //    ("Extraction of all" +
            //     " results needed " 
            //     + millisecondsnow 
            //     + " ms");

            return searchResults;
        }
コード例 #19
0
        private static IMDbMovie MineFilmDetailsFromMainPage
            (IMDbFilmDetails filmDetails,
             string trimmedHtml, IMDbMovie movie )
        {

            IMDbRegEx imDbRegEx = new IMDbRegEx();
            imDbRegEx.SetRegExPatterns();


            IMDbFilmDetails.MineProductionYearUsingRegex(movie, trimmedHtml, imDbRegEx);
            //IMDbFilmDetails.MineProductionYearUsingXpath(movie, trimmedHtml);


            IMDbFilmDetails.GetActorsUsingXpath(movie, trimmedHtml);


            IMDbFilmDetails.GetReleaseDate(movie, trimmedHtml, imDbRegEx);


            IMDbFilmDetails.GetTagline(movie, trimmedHtml, imDbRegEx);


            IMDbFilmDetails.GetRuntime(movie, trimmedHtml, imDbRegEx);


            IMDbFilmDetails.GetRating(movie, trimmedHtml, imDbRegEx);


            IMDbFilmDetails.ExtractRatingDescription(movie, trimmedHtml, imDbRegEx);


            IMDbFilmDetails.GetReview(movie, trimmedHtml, imDbRegEx);


            IMDbFilmDetails.GetStudio(movie, trimmedHtml, imDbRegEx);


            //IMDbFilmDetails.MineOverviewUsingRegex(movie, trimmedHtml, imDbRegEx);
            IMDbFilmDetails.MineOverviewUsingXpath(movie, trimmedHtml);

            //filmDetails.MineDirectorUsingRegex(ref movie, trimmedHtml);
            filmDetails.MineDirectorUsingXpath(ref movie, trimmedHtml);


            //filmDetails.MineWriterUsingRegex(ref movie, trimmedHtml);
            filmDetails.MineWriterUsingXpath(ref movie, trimmedHtml);

            filmDetails.GetGenres(ref movie, trimmedHtml);




            return movie;


        }