public void Download_ReleaseBB_MovieIndex(int StartPage = 1, int LastPage = 1366) { _Parse.XML xml = new _Parse.XML(); _Lists lst = new _Lists(); _Sites.Imdb imdb = new Imdb(); string startURL = "http://rlsbb.ru/category/movies/page/"; int LastPageNum = LastPage; int pageNum = StartPage; do { string URL = startURL + pageNum + "/"; string HTML = ahk.Download_HTML(URL); string xmlPath = "//*[@id=\"contentArea\"]"; List <string> segs = xml.Parse_HTML_XML(HTML, xmlPath); // extract sections of text from html xml int segNum = 1; foreach (string seg in segs) { ReleaseBBMovies obj = new ReleaseBBMovies(); // skip segs on page that aren't posts if (segNum == 1) { segNum++; continue; } if (segNum == 2) { segNum++; continue; } if (segNum == 13) { segNum++; continue; } if (segNum == 14) { segNum++; continue; } //ahk.MsgBox(segNum + " | " + seg); segNum++; string PostTitle = ""; string PostURL = ""; string IMDbID = ""; List <string> links = imdb.Regex_IMDbLinks(seg); if (links.Count > 0) { IMDbID = imdb.IMDb_ID_FromURL(links[0]); } // parse index page segment List <string> lines = lst.Text_To_List(seg, true, true, false); foreach (string line in lines) { if (line.Contains("postTitle")) { PostURL = line.Replace("<h2 class=\"postTitle\"><span></span><a href=\"", ""); PostTitle = ahk.StringSplit(PostURL, ">", 1); PostTitle = ahk.StringSplit(PostTitle, "<", 0); PostURL = ahk.StringSplit(PostURL, "\"", 0); //ahk.MsgBox("IMDB: " + IMDbID + "\n\n" + PostTitle + "\n\n" + PostURL); } } obj.IMDbID = IMDbID; obj.PostTitle = PostTitle; obj.PostURL = PostURL; bool Updated = ReleaseBBMovies_UpdateInsert_IndexEntry(obj); } pageNum++; }while (pageNum <= LastPageNum); }