Ejemplo n.º 1
0
            public void Download_ReleaseBB_MovieIndex(int StartPage = 1, int LastPage = 1366)
            {
                _Parse.XML xml = new _Parse.XML();
                _Lists     lst = new _Lists();

                _Sites.Imdb imdb = new Imdb();

                string startURL    = "http://rlsbb.ru/category/movies/page/";
                int    LastPageNum = LastPage;

                int pageNum = StartPage;

                do
                {
                    string URL  = startURL + pageNum + "/";
                    string HTML = ahk.Download_HTML(URL);

                    string xmlPath = "//*[@id=\"contentArea\"]";

                    List <string> segs = xml.Parse_HTML_XML(HTML, xmlPath);  // extract sections of text from html xml

                    int segNum = 1;
                    foreach (string seg in segs)
                    {
                        ReleaseBBMovies obj = new ReleaseBBMovies();

                        // skip segs on page that aren't posts
                        if (segNum == 1)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 2)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 13)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 14)
                        {
                            segNum++; continue;
                        }

                        //ahk.MsgBox(segNum + " | " + seg);
                        segNum++;

                        string PostTitle = "";
                        string PostURL   = "";
                        string IMDbID    = "";

                        List <string> links = imdb.Regex_IMDbLinks(seg);

                        if (links.Count > 0)
                        {
                            IMDbID = imdb.IMDb_ID_FromURL(links[0]);
                        }

                        // parse index page segment
                        List <string> lines = lst.Text_To_List(seg, true, true, false);
                        foreach (string line in lines)
                        {
                            if (line.Contains("postTitle"))
                            {
                                PostURL   = line.Replace("<h2 class=\"postTitle\"><span></span><a href=\"", "");
                                PostTitle = ahk.StringSplit(PostURL, ">", 1);
                                PostTitle = ahk.StringSplit(PostTitle, "<", 0);
                                PostURL   = ahk.StringSplit(PostURL, "\"", 0);
                                //ahk.MsgBox("IMDB: " + IMDbID + "\n\n" + PostTitle + "\n\n" + PostURL);
                            }
                        }

                        obj.IMDbID    = IMDbID;
                        obj.PostTitle = PostTitle;
                        obj.PostURL   = PostURL;

                        bool Updated = ReleaseBBMovies_UpdateInsert_IndexEntry(obj);
                    }

                    pageNum++;
                }while (pageNum <= LastPageNum);
            }