Пример #1
0
        private void parseMovies( )
        {
            string logPath = string.Format(Application.StartupPath + @"\_log\{0}{1}{2}_{3}_{4}_{5}\",
                                           DateTime.Now.Year.ToString( ),
                                           DateTime.Now.Month.ToString( ).PadLeft(2, '0'),
                                           DateTime.Now.Day.ToString( ).PadLeft(2, '0'),
                                           DateTime.Now.Hour.ToString( ).PadLeft(2, '0'),
                                           DateTime.Now.Minute.ToString( ).PadLeft(2, '0'),
                                           DateTime.Now.Second.ToString( ).PadLeft(2, '0'));

            Directory.CreateDirectory(logPath);

            TextWriter twSum = new StreamWriter(logPath + logHelper.createLogFileName("_summary"));

            twSum.WriteLine(string.Format("{0}\tThread started for {1} movies",
                                          DateTime.Now.ToString( ),
                                          mCount.ToString( )));

            foreach (var item in mListCurrent)
            {
                string movieName = MovieNameBL.GetOriginalNameOfMovie(item.ID);
                if (!string.IsNullOrEmpty(item.ImdbID))
                {
                    string imdbId = item.ImdbID;

                    TextWriter tw    = new StreamWriter(logPath + logHelper.createLogFileName(String.Format("{0} ({1})", movieName, imdbId)));
                    DateTime   start = DateTime.Now;
                    //TODO: Stopwatch olarak geliştirirsek daha bi güzel olur aslında
                    Stopwatch sw = new Stopwatch( );
                    sw.Start( );
                    tw.WriteLine(" GET BY ID ");
                    tw.WriteLine("Start : " + start.ToString( ));

                    getMovieInfoByIdAndSave(imdbId, item, tw, start);

                    TimeSpan processTime = sw.Elapsed;
                    sw.Stop( );

                    tw.WriteLine("End : " + DateTime.Now.ToString( ));
                    tw.WriteLine( );
                    tw.WriteLine(String.Format("Total elapsed time: {0} minutes {1} seconds {2} miliseconds",
                                               processTime.Minutes.ToString( ),
                                               processTime.Seconds.ToString( ),
                                               processTime.Milliseconds.ToString( )));
                    tw.Close( );
                }
                else if (!String.IsNullOrEmpty(movieName))
                {
                    bool skipSearchForNow = true;
                    if (skipSearchForNow == false)
                    {
                        string search   = movieName;
                        string imdbId   = "";
                        String tempHtml = "";

                        TextWriter tw    = new StreamWriter(logPath + logHelper.createLogFileName(String.Format("{0}", movieName)));
                        DateTime   start = DateTime.Now;
                        Stopwatch  sw    = new Stopwatch( );
                        sw.Start( );
                        logHelper.logLine(tw, " GET BY SEARCH ");
                        logHelper.logLine(tw, "Search started");

                        try {
                            tempHtml = WebRequestHelper.CallPage("http://www.imdb.com/find?tt=all&q=" + search.Replace(" ", "+"));
                        } catch (Exception ex) { logHelper.logException(tw, ex); }

                        logHelper.logLine(tw, "Query ran successfully");

                        System.Text.RegularExpressions.Regex rgx;

                        if (tempHtml != "")
                        {
                            string temp = "";

                            //check if this is the search page
                            //or if we've been redirected to the movie's page
                            rgx = new System.Text.RegularExpressions.Regex(@"<title>IMDb Title Search</title>");

                            if (rgx.Matches(tempHtml).Count == 1)
                            {
                                //arama sayfasına gelmişiz

                                logHelper.logLine(tw, "We are at search result page");

                                //we'll parse all movies in this page and show the user the list to select from
                                //if there is only 1 exact title result, we will take it as the single result
                                if (tempHtml.Contains("<b>Popular Titles</b>"))
                                {
                                    logHelper.logLine(tw, "Our movie is in the Popular Titles!");

                                    //popüler sonuçlarda bulduk, onu alalım
                                    temp   = tempHtml.Substring(tempHtml.IndexOf("<b>Popular Titles</b>"));
                                    temp   = temp.Substring(temp.IndexOf("<a href=\"/title/tt") + 16);
                                    imdbId = temp.Remove(temp.IndexOf("/"));
                                }
                                else
                                {
                                    logHelper.logLine(tw, "Movie is not so Popular. Not checking now. Sent to waiting queue.");
                                    //popülerde yoksa, şimdilik salla, sonra bakalım ama - - -!!!
                                    copyItemToList(mListCurrent, mListWaiting, item.ID);
                                }
                            }
                            else
                            {
                                //sayfayı bulmuşuz

                                logHelper.logLine(tw, "We are at the movie page");

                                if (tempHtml.Contains("<link href=\"http://www.imdb.com/title/"))
                                {
                                    temp = tempHtml.Substring(tempHtml.IndexOf("<link href=\"http://www.imdb.com/title/") + 38);
                                }
                                else if (tempHtml.Contains("<link rel=\"canonical\" href=\"http://www.imdb.com/title/"))
                                {
                                    temp = tempHtml.Substring(tempHtml.IndexOf("<link rel=\"canonical\" href=\"http://www.imdb.com/title/") + 54);
                                }
                                imdbId = temp.Remove(temp.IndexOf("/"));
                            }
                            if (!String.IsNullOrEmpty(imdbId))
                            {
                                logHelper.logLine(tw, "Got the ID: " + imdbId);
                                getMovieInfoByIdAndSave(imdbId, item, tw, start);
                            }
                        }
                        else
                        {
                            logHelper.logLine(tw, "Error while connecting to IMDB. Aborting.");

                            copyItemToList(mListCurrent, mListWaiting, item.ID);
                        }

                        tw.Close( );
                    }
                }
                else
                {
                    //movie is f****d up. forget about it..
                    //removeItemFromMovieList ( mListCurrent, item.movieId );
                }

                setProgressBarValue(parseStatus, parseStatus.Value + 1);
            }
            twSum.WriteLine(string.Format("{0}\tThread finished with {1} movies waiting in queue",
                                          DateTime.Now.ToString( ),
                                          mListWaiting.Count.ToString( )));
            twSum.Close( );

            MessageBox.Show(string.Format("{0}\tThread finished with {1} movies waiting in queue",
                                          DateTime.Now.ToString( ),
                                          mListWaiting.Count.ToString( )));

            setButtonEnabled(btnStart, true);
        }
Пример #2
0
        private void btnSearch_Click(object sender, EventArgs e)
        {
            //MOVIE NAME
            if (!string.IsNullOrEmpty(txtSearch.Text))
            {
                string search   = ImdbHelper.getSearchFriendlyString(txtSearch.Text);
                string imdbId   = "";
                string tempHtml = "";
                //StreamReader sr = new StreamReader ( @"E:\Projects\MArchive\MArchiveImdbParser\imdb_se.txt" );
                //tempHtml = sr.ReadToEnd ( );
                try {
                    tempHtml = WebRequestHelper.CallPage("http://www.imdb.com/find?tt=all&q=" + search.Replace(" ", "+"));
                } catch (Exception ex) { MessageBox.Show(ex.Message); }

                Regex           rgx;
                MatchCollection matches;

                if (tempHtml != "")
                {
                    string temp = "";

                    //check if this is the search page
                    //or if we've been redirected to the movie's page
                    string _searchPagePattern = "<link.*href.*imdb.com/find";

                    rgx     = new System.Text.RegularExpressions.Regex(_searchPagePattern);
                    matches = rgx.Matches(tempHtml);

                    if (matches.Count > 0)
                    {
                        //arama sayfasına gelmişiz

                        _searchPagePattern = string.Format("<div class={0}{1}findSection{0}{1}>.*<table class={0}{1}findList{0}{1}.*>(.*)</table>.*More title matches",
                                                           "\\", "\"");
                        rgx     = new Regex(_searchPagePattern, RegexOptions.Singleline);
                        matches = rgx.Matches(tempHtml);

                        if (matches.Count > 0)
                        {
                            string tableContent = matches[0].Groups[1].Value;

                            _searchPagePattern = string.Format("<a href=\"/title/(tt[0-9]{7,9})/");
                            //TODO: Burada kaldım
                        }

                        if (tempHtml.Contains("<b>Popular Titles</b>"))
                        {
                            //popüler sonuçlarda bulduk, onu alalım
                            temp   = tempHtml.Substring(tempHtml.IndexOf("<b>Popular Titles</b>"));
                            temp   = temp.Substring(temp.IndexOf("<a href=\"/title/tt") + 16);
                            imdbId = temp.Remove(temp.IndexOf("/"));
                        }
                        else
                        {
                            //popülerde yoksa, şimdilik salla, sonra bakalım ama - - -!!!
                            throw new Exception("I could not find the movie that you searched for.. Sorry.");
                        }
                    }
                    else
                    {
                        //sayfayı bulmuşuz
                        if (tempHtml.Contains("<link href=\"http://www.imdb.com/title/"))
                        {
                            temp = tempHtml.Substring(tempHtml.IndexOf("<link href=\"http://www.imdb.com/title/") + 38);
                        }
                        else if (tempHtml.Contains("<link rel=\"canonical\" href=\"http://www.imdb.com/title/"))
                        {
                            temp = tempHtml.Substring(tempHtml.IndexOf("<link rel=\"canonical\" href=\"http://www.imdb.com/title/") + 54);
                        }
                        imdbId = temp.Remove(temp.IndexOf("/"));
                    }
                }
                else
                {
                    throw new Exception("Error while connecting to IMDB");
                }

                MessageBox.Show(imdbId);
            }
        }