internal static void IgnoreIrrelevantResults (Match match, IMDbRegEx imDbRegex, IMDbSearchResult result) { //TODO: Also ignore "Making Of" titles. //TODO: Also ignore documentaries (option). string extra = imDbRegex.GetMatchValue (match, "Extra", true); IgnoreVideoGameResult(result, extra); }
internal static List<IIMDbSearchResult> SingleMatchAddToSearchResults (string html, IMDbRegEx imDbRegex, List<IIMDbSearchResult> results) { var titleIndexFound = LocateTitleIndex(html); if (!titleIndexFound) return results; Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "A single result was found."); //TODO: Are these RegExes the same as the 'movie result' ones? const string titleAndYearPattern = @"<title>(?<Title>.*?)\((?<Year>.*?)\)</title>"; const string ImdbIdPattern = @"<a\shref=""/title/(?<IMDbID>[^/]*)/fullcredits"""; Match match = imDbRegex.GetRegExMatch(html, titleAndYearPattern); #region Return if no matches were found if (match == null || match.Length == 0) return results; #endregion var result = new IMDbSearchResult { Title = imDbRegex.GetMatchValue(match, "Title", true), Year = imDbRegex.GetMatchValue(match, "Year", true) }; MineImdbIdFromSingleFilmMatch (html, imDbRegex, ImdbIdPattern, result); AddSingleFilmResultToSearchResults (results, result); return results; }
internal static IMDbSearchResult MultipleMatchesMineDetailsOfSingleFilmResult (Match match, IMDbRegEx imDbRegex) { #region Result's details Debugger.LogMessageToFile( "[IMDb Conventional Film Search Engine] " + "New search result was found. " + "Proceeding to add result to the list of total search results..."); var result = new IMDbSearchResult(); result.IMDb_ID = imDbRegex.GetMatchValue (match, "IMDbID", true); Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Result IMDb ID: " + result.IMDb_ID); result.Title = imDbRegex.GetMatchValue (match, "Title", true); Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Result Title: " + result.Title); result.Year = imDbRegex.GetMatchValue (match, "Year", true); Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Result Year: " + result.Year); string URL = imDbRegex.GetMatchValue (match, "URL", true); result.URL = "http://www.imdb.com" + URL; Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Result URL: " + result.URL); #endregion return result; }
internal static void MineImdbIdFromSingleFilmMatch (string html, IMDbRegEx imDbRegex, string imdbIdPattern, IIMDbSearchResult result) { Match match = imDbRegex.GetRegExMatch(html, imdbIdPattern); if (match != null && match.Length > 0) result.IMDb_ID = imDbRegex.GetMatchValue (match, "IMDbID", true); }
internal static void GetRuntime (IIMDbMovie movie, string trimmedHTML, IMDbRegEx imDbRegEx) { Debugger.LogMessageToFile ("[IMDb film details downloader]" + " Extracting Runtime..."); Match match = imDbRegEx.GetRegExMatch (trimmedHTML, imDbRegEx.RuntimePattern); string runtime = imDbRegEx.GetMatchValue (match, "Runtime", true); runtime = IMDbMovieDetailsDownloaderHelpers.FixRuntime(runtime); movie.Runtime = runtime; Debugger.LogMessageToFile("IMDb returned Runtime: " + movie.Runtime); //MessageBox.Show("IMDb returned Runtime: " + movie.Runtime); }
internal static void GetRating (IIMDbMovie movie, string trimmedHtml, IMDbRegEx imDbRegEx) { if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieRatings) return; Debugger.LogMessageToFile ("[IMDb film details downloader]" + " Extracting Rating..."); Match match = imDbRegEx.GetRegExMatch (trimmedHtml, imDbRegEx.RatingPattern); movie.Rating = imDbRegEx.GetMatchValue (match, "Rating", true); Debugger.LogMessageToFile("IMDb returned Rating: " + movie.Rating); }
private static string CleanHtmlPage(string html) { IMDbRegEx imDbRegEx = new IMDbRegEx(); imDbRegEx.SetRegExPatterns(); Debugger.LogMessageToFile ("Cleaning IMDb result html "); string trimmedHtml = imDbRegEx.GetRegExString (html, imDbRegEx.TrimmedHTMLpattern); return trimmedHtml; }
private static List<IIMDbSearchResult> MultipleMatchesAddToSearchResults (Match match, IMDbRegEx imDbRegex, List<IIMDbSearchResult> results) { while (match != null && match.Length > 0) { var result = IMDbConventionalFilmSearchEngineHelpers .MultipleMatchesMineDetailsOfSingleFilmResult(match, imDbRegex); IMDbConventionalFilmSearchEngineHelpers .IgnoreIrrelevantResults (match, imDbRegex, result); results.Add(result); Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Result was added to list."); match = match.NextMatch(); Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine]" + " Proceeding to next result..."); } return results; }
internal static void GetReleaseDate (IIMDbMovie movie, string trimmedHTML, IMDbRegEx imDbRegEx) { if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieReleaseDate) return; Debugger.LogMessageToFile("[IMDb film details downloader]" + " Extracting Release Date..."); Match match = imDbRegEx.GetRegExMatch (trimmedHTML, imDbRegEx.ReleaseDatePattern); movie.Release_Date = imDbRegEx.GetMatchValue (match, "ReleaseDate", true); Debugger.LogMessageToFile("[IMDb film details downloader]" + " IMDb returned Release Date: " + movie.Release_Date); //MessageBox.Show("IMDb returned Release Date: " + movie.Release_Date); }
internal static void GetStudio (IMDbMovie movie, string trimmedHtml, IMDbRegEx imDbRegEx) { if (!ImdbFilmDetailsIndividualChoices .GetIMDbMovieProductionStudio) return; Debugger.LogMessageToFile ("[IMDb film details downloader] " + "Extracting Studio..."); Match match = imDbRegEx .GetRegExMatch (trimmedHtml, imDbRegEx.StudioPattern); movie.Studio = imDbRegEx.GetMatchValue (match, "Studio", true); Debugger.LogMessageToFile ("[IMDb film details downloader] " + "IMDb returned Studio: " + movie.Studio); }
internal static void GetReview (IMDbMovie movie, string trimmedHTML, IMDbRegEx imDbRegEx) { if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieReviews) return; Debugger.LogMessageToFile("[IMDb film details downloader] Extracting Review score..."); Match match = imDbRegEx.GetRegExMatch(trimmedHTML, imDbRegEx.ReviewPattern); movie.Review = imDbRegEx.GetMatchValue(match, "Review", true); if (movie.Review.Contains("/")) movie.Review = movie.Review.Substring(0, movie.Review.IndexOf("/", System.StringComparison.Ordinal)); Debugger.LogMessageToFile("IMDb returned Review: " + movie.Review); }
internal static void ExtractRatingDescription (IMDbMovie movie, string trimmedHTML, IMDbRegEx imDbRegEx) { if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieRatingDescription) return; Debugger.LogMessageToFile("[IMDb film details downloader] Extracting Rating Description..."); Match match = imDbRegEx.GetRegExMatch (trimmedHTML, imDbRegEx.RatingDescriptionPattern); movie.RatingDescription = imDbRegEx.GetMatchValue (match, "RatingDescription", true); Debugger.LogMessageToFile("[IMDb film details downloader] IMDb returned Rating Description: " + movie.RatingDescription); }
internal static void MineOverviewUsingRegex (IMDbMovie movie, string trimmedHTML, IMDbRegEx imDbRegEx ) { if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieShortOverview) return; Debugger.LogMessageToFile ("[IMDb film details downloader]" + " Extracting Short Overview..."); Match match = imDbRegEx.GetRegExMatch (trimmedHTML, imDbRegEx.FilmDescriptionPattern); movie.OverviewShort = imDbRegEx.GetMatchValue (match, "ShortOverview", true); if (movie.OverviewShort.ToLower().EndsWith("more")) movie.OverviewShort = movie.OverviewShort.Substring (0, movie.OverviewShort.Length - 4).Trim(); movie.OverviewShort = movie.OverviewShort.Trim() + "..."; Debugger.LogMessageToFile("IMDb returned Overview: " + movie.OverviewShort); }
internal static Match GetMovieTitleRegexMatch (string imdbID, string trimmedHtml) { Debugger.LogMessageToFile ("[IMDb Movie Details Downloader]" + " Getting Title regex match..."); var imDbRegEx = new IMDbRegEx(); imDbRegEx.SetRegExPatterns(); Match match = imDbRegEx.GetRegExMatch (trimmedHtml, IMDbRegEx.TitlePatternOriginal); try { string filmTitle = match.Groups[1].Captures[0].Value; return match; } catch (Exception) { match = imDbRegEx.GetRegExMatch (trimmedHtml, IMDbRegEx.TitlePatternPrimary); try { string filmTitle = match.Groups[1].Captures[0].Value; return match; } catch (Exception e) { Debugger.LogMessageToFile ("[IMDb Movie Details Downloader] " + "The IMDb Movie Details Downloader was unable" + " to extract the movie title " + "for the movie with IMDb ID: " + imdbID + "."); return match; } } }
internal static void MineProductionYearUsingRegex (IIMDbMovie movie, string trimmedHtml, IMDbRegEx imDbRegEx) { Debugger.LogMessageToFile("[IMDb film details downloader]" + " Extracting Year..."); Match match = imDbRegEx.GetRegExMatch (trimmedHtml, imDbRegEx.YearPattern); movie.Year = imDbRegEx.GetMatchValue (match, "Year", true); movie.Year = movie.Year.TrimEnd('/'); Debugger.LogMessageToFile ("[IMDb film details downloader] " + "IMDb returned Year: " + movie.Year); //MessageBox.Show // ("IMDb returned Year: " + movie.Year); }
AddSingleOrMultipleMatchesToSearchResults (IMDbRegEx imDbRegex, string html, bool multipleResultsFound, List<IIMDbSearchResult> results) { if (!multipleResultsFound) { Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Multiple results were not found."); results = IMDbConventionalFilmSearchEngineHelpers.SingleMatchAddToSearchResults (html, imDbRegex, results); } else { Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Multiple results were found."); #region Result Pattern 1 Match match = imDbRegex.GetRegExMatch(html, imDbRegex.MovieResultPattern1); results = MultipleMatchesAddToSearchResults(match, imDbRegex, results); #endregion #region Result Pattern 2 match = imDbRegex.GetRegExMatch(html, imDbRegex.MovieResultPattern2); results = MultipleMatchesAddToSearchResults(match, imDbRegex, results); #endregion } return results; }
internal static void GetTagline (IIMDbMovie movie, string trimmedHTML, IMDbRegEx imDbRegEx) { if (!ImdbFilmDetailsIndividualChoices.GetIMDbMovieTaglines) return; Debugger.LogMessageToFile("Extracting Tagline..."); Match match = imDbRegEx.GetRegExMatch (trimmedHTML, imDbRegEx.TaglinePattern); movie.Tagline = imDbRegEx.GetMatchValue (match, "Tagline", true); Debugger.LogMessageToFile("IMDb returned Tagline: " + movie.Tagline); //MessageBox.Show("IMDb returned Tagline: " + movie.Tagline); }
public static IList<IIMDbSearchResult> SearchForTitleReturnResults(string title) { #region Vars Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Initializing variables..."); var searchResults = new List<IIMDbSearchResult>(); var imDbRegex = new IMDbRegEx(); //TODO: Test if SetPatterns() is correctly called and regexes are succesfully loaded! string titleEncoded = ToolBox.Utils.WebUtils.EncodeURL(title); Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Title to search for: " + title); Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "Encoded title: " + titleEncoded); string SearchURL = "http://www.imdb.com/find?s=title&q=" + titleEncoded; //string SearchURL = "http://m.imdb.com/find?q=" + titleEncoded + "&button=Search"; Debugger.LogMessageToFile ("[IMDb Conventional Film Search Engine] " + "IMDb search URL: " + SearchURL); #endregion var watch = new Stopwatch(); watch.Start(); long millisecondsnow; string searchResultsHtml = DownloadSearchResultsHtml (watch, SearchURL); if ( String.IsNullOrEmpty ( searchResultsHtml )) return searchResults; var multipleResultsFound = IMDbConventionalFilmSearchEngineHelpers.DretectMultipleResults (searchResultsHtml); searchResults = AddSingleOrMultipleMatchesToSearchResults (imDbRegex, searchResultsHtml, multipleResultsFound, searchResults); //MessageBox.Show("Results: " + results.Count.ToString() ); millisecondsnow = watch.ElapsedMilliseconds; //MessageBox.Show // ("Extraction of all" + // " results needed " // + millisecondsnow // + " ms"); return searchResults; }
private static IMDbMovie MineFilmDetailsFromMainPage (IMDbFilmDetails filmDetails, string trimmedHtml, IMDbMovie movie ) { IMDbRegEx imDbRegEx = new IMDbRegEx(); imDbRegEx.SetRegExPatterns(); IMDbFilmDetails.MineProductionYearUsingRegex(movie, trimmedHtml, imDbRegEx); //IMDbFilmDetails.MineProductionYearUsingXpath(movie, trimmedHtml); IMDbFilmDetails.GetActorsUsingXpath(movie, trimmedHtml); IMDbFilmDetails.GetReleaseDate(movie, trimmedHtml, imDbRegEx); IMDbFilmDetails.GetTagline(movie, trimmedHtml, imDbRegEx); IMDbFilmDetails.GetRuntime(movie, trimmedHtml, imDbRegEx); IMDbFilmDetails.GetRating(movie, trimmedHtml, imDbRegEx); IMDbFilmDetails.ExtractRatingDescription(movie, trimmedHtml, imDbRegEx); IMDbFilmDetails.GetReview(movie, trimmedHtml, imDbRegEx); IMDbFilmDetails.GetStudio(movie, trimmedHtml, imDbRegEx); //IMDbFilmDetails.MineOverviewUsingRegex(movie, trimmedHtml, imDbRegEx); IMDbFilmDetails.MineOverviewUsingXpath(movie, trimmedHtml); //filmDetails.MineDirectorUsingRegex(ref movie, trimmedHtml); filmDetails.MineDirectorUsingXpath(ref movie, trimmedHtml); //filmDetails.MineWriterUsingRegex(ref movie, trimmedHtml); filmDetails.MineWriterUsingXpath(ref movie, trimmedHtml); filmDetails.GetGenres(ref movie, trimmedHtml); return movie; }