private void parseMovies( ) { string logPath = string.Format(Application.StartupPath + @"\_log\{0}{1}{2}_{3}_{4}_{5}\", DateTime.Now.Year.ToString( ), DateTime.Now.Month.ToString( ).PadLeft(2, '0'), DateTime.Now.Day.ToString( ).PadLeft(2, '0'), DateTime.Now.Hour.ToString( ).PadLeft(2, '0'), DateTime.Now.Minute.ToString( ).PadLeft(2, '0'), DateTime.Now.Second.ToString( ).PadLeft(2, '0')); Directory.CreateDirectory(logPath); TextWriter twSum = new StreamWriter(logPath + logHelper.createLogFileName("_summary")); twSum.WriteLine(string.Format("{0}\tThread started for {1} movies", DateTime.Now.ToString( ), mCount.ToString( ))); foreach (var item in mListCurrent) { string movieName = MovieNameBL.GetOriginalNameOfMovie(item.ID); if (!string.IsNullOrEmpty(item.ImdbID)) { string imdbId = item.ImdbID; TextWriter tw = new StreamWriter(logPath + logHelper.createLogFileName(String.Format("{0} ({1})", movieName, imdbId))); DateTime start = DateTime.Now; //TODO: Stopwatch olarak geliştirirsek daha bi güzel olur aslında Stopwatch sw = new Stopwatch( ); sw.Start( ); tw.WriteLine(" GET BY ID "); tw.WriteLine("Start : " + start.ToString( )); getMovieInfoByIdAndSave(imdbId, item, tw, start); TimeSpan processTime = sw.Elapsed; sw.Stop( ); tw.WriteLine("End : " + DateTime.Now.ToString( )); tw.WriteLine( ); tw.WriteLine(String.Format("Total elapsed time: {0} minutes {1} seconds {2} miliseconds", processTime.Minutes.ToString( ), processTime.Seconds.ToString( ), processTime.Milliseconds.ToString( ))); tw.Close( ); } else if (!String.IsNullOrEmpty(movieName)) { bool skipSearchForNow = true; if (skipSearchForNow == false) { string search = movieName; string imdbId = ""; String tempHtml = ""; TextWriter tw = new StreamWriter(logPath + logHelper.createLogFileName(String.Format("{0}", movieName))); DateTime start = DateTime.Now; Stopwatch sw = new Stopwatch( ); sw.Start( ); logHelper.logLine(tw, " GET BY SEARCH "); logHelper.logLine(tw, "Search started"); try { tempHtml = WebRequestHelper.CallPage("http://www.imdb.com/find?tt=all&q=" + search.Replace(" ", "+")); } catch (Exception ex) { logHelper.logException(tw, ex); } logHelper.logLine(tw, "Query ran successfully"); System.Text.RegularExpressions.Regex rgx; if (tempHtml != "") { string temp = ""; //check if this is the search page //or if we've been redirected to the movie's page rgx = new System.Text.RegularExpressions.Regex(@"<title>IMDb Title Search</title>"); if (rgx.Matches(tempHtml).Count == 1) { //arama sayfasına gelmişiz logHelper.logLine(tw, "We are at search result page"); //we'll parse all movies in this page and show the user the list to select from //if there is only 1 exact title result, we will take it as the single result if (tempHtml.Contains("<b>Popular Titles</b>")) { logHelper.logLine(tw, "Our movie is in the Popular Titles!"); //popüler sonuçlarda bulduk, onu alalım temp = tempHtml.Substring(tempHtml.IndexOf("<b>Popular Titles</b>")); temp = temp.Substring(temp.IndexOf("<a href=\"/title/tt") + 16); imdbId = temp.Remove(temp.IndexOf("/")); } else { logHelper.logLine(tw, "Movie is not so Popular. Not checking now. Sent to waiting queue."); //popülerde yoksa, şimdilik salla, sonra bakalım ama - - -!!! copyItemToList(mListCurrent, mListWaiting, item.ID); } } else { //sayfayı bulmuşuz logHelper.logLine(tw, "We are at the movie page"); if (tempHtml.Contains("<link href=\"http://www.imdb.com/title/")) { temp = tempHtml.Substring(tempHtml.IndexOf("<link href=\"http://www.imdb.com/title/") + 38); } else if (tempHtml.Contains("<link rel=\"canonical\" href=\"http://www.imdb.com/title/")) { temp = tempHtml.Substring(tempHtml.IndexOf("<link rel=\"canonical\" href=\"http://www.imdb.com/title/") + 54); } imdbId = temp.Remove(temp.IndexOf("/")); } if (!String.IsNullOrEmpty(imdbId)) { logHelper.logLine(tw, "Got the ID: " + imdbId); getMovieInfoByIdAndSave(imdbId, item, tw, start); } } else { logHelper.logLine(tw, "Error while connecting to IMDB. Aborting."); copyItemToList(mListCurrent, mListWaiting, item.ID); } tw.Close( ); } } else { //movie is f****d up. forget about it.. //removeItemFromMovieList ( mListCurrent, item.movieId ); } setProgressBarValue(parseStatus, parseStatus.Value + 1); } twSum.WriteLine(string.Format("{0}\tThread finished with {1} movies waiting in queue", DateTime.Now.ToString( ), mListWaiting.Count.ToString( ))); twSum.Close( ); MessageBox.Show(string.Format("{0}\tThread finished with {1} movies waiting in queue", DateTime.Now.ToString( ), mListWaiting.Count.ToString( ))); setButtonEnabled(btnStart, true); }
private void btnSearch_Click(object sender, EventArgs e) { //MOVIE NAME if (!string.IsNullOrEmpty(txtSearch.Text)) { string search = ImdbHelper.getSearchFriendlyString(txtSearch.Text); string imdbId = ""; string tempHtml = ""; //StreamReader sr = new StreamReader ( @"E:\Projects\MArchive\MArchiveImdbParser\imdb_se.txt" ); //tempHtml = sr.ReadToEnd ( ); try { tempHtml = WebRequestHelper.CallPage("http://www.imdb.com/find?tt=all&q=" + search.Replace(" ", "+")); } catch (Exception ex) { MessageBox.Show(ex.Message); } Regex rgx; MatchCollection matches; if (tempHtml != "") { string temp = ""; //check if this is the search page //or if we've been redirected to the movie's page string _searchPagePattern = "<link.*href.*imdb.com/find"; rgx = new System.Text.RegularExpressions.Regex(_searchPagePattern); matches = rgx.Matches(tempHtml); if (matches.Count > 0) { //arama sayfasına gelmişiz _searchPagePattern = string.Format("<div class={0}{1}findSection{0}{1}>.*<table class={0}{1}findList{0}{1}.*>(.*)</table>.*More title matches", "\\", "\""); rgx = new Regex(_searchPagePattern, RegexOptions.Singleline); matches = rgx.Matches(tempHtml); if (matches.Count > 0) { string tableContent = matches[0].Groups[1].Value; _searchPagePattern = string.Format("<a href=\"/title/(tt[0-9]{7,9})/"); //TODO: Burada kaldım } if (tempHtml.Contains("<b>Popular Titles</b>")) { //popüler sonuçlarda bulduk, onu alalım temp = tempHtml.Substring(tempHtml.IndexOf("<b>Popular Titles</b>")); temp = temp.Substring(temp.IndexOf("<a href=\"/title/tt") + 16); imdbId = temp.Remove(temp.IndexOf("/")); } else { //popülerde yoksa, şimdilik salla, sonra bakalım ama - - -!!! throw new Exception("I could not find the movie that you searched for.. Sorry."); } } else { //sayfayı bulmuşuz if (tempHtml.Contains("<link href=\"http://www.imdb.com/title/")) { temp = tempHtml.Substring(tempHtml.IndexOf("<link href=\"http://www.imdb.com/title/") + 38); } else if (tempHtml.Contains("<link rel=\"canonical\" href=\"http://www.imdb.com/title/")) { temp = tempHtml.Substring(tempHtml.IndexOf("<link rel=\"canonical\" href=\"http://www.imdb.com/title/") + 54); } imdbId = temp.Remove(temp.IndexOf("/")); } } else { throw new Exception("Error while connecting to IMDB"); } MessageBox.Show(imdbId); } }