public bool RGP(int PageNum = 1) { string url = "http://rgporn.com/index.php?newsid=" + PageNum; //tel.Update(txtURL, url); string html = web.DownloadHTML(url); if (html == "") { return(false); } resultLines = xml.Parse_HTML_XML(html, "//body/div/div/div/div/table/tbody/tr/td/div/div/div/p"); // extract sections of text from html xml // extract tags string display = "TAGS: "; List <string> tags = new List <string>(); string Tags = ""; foreach (string line in resultLines) { if (line.Contains("=tags")) { string tag = line.Replace("<a", ""); tag = tag.Replace("</a>", ""); tag = ahk.StringSplit(tag, ">", 1); tag = tag.Trim(); tags.Add(tag); display = display + "\n" + tag; if (Tags != "") { Tags = Tags + "|" + tag; } if (Tags == "") { Tags = tag; } } } // extract title List <string> titleLines = xml.Parse_HTML_XML(html, "//body/div/div/div/div/table/tbody/tr/td/div/div/div"); // extract sections of text from html xml string title = lst.Return_List_Value(titleLines, 2); title = title.Replace("<h1 class=\"shead\">", ""); title = title.Replace("</h1>", ""); string imagehtml = ""; foreach (string ima in titleLines) { if (ima.Contains(".jpg")) { imagehtml = imagehtml + "\n" + ima; } } //ahk.MsgBox(title); List <string> links = rg.Regex_RGLinks(html); bool AllOnline = true; int onlineLinkCount = 0; string linkText = ""; foreach (string link in links) { RGInfo info = rg.RapidGatorCheckStatus(link); if (!info.FileOnline) { AllOnline = false; } else { onlineLinkCount++; } string wlink = link + "|" + info.FileSize; if (linkText != "") { linkText = linkText + "\n" + wlink; } if (linkText == "") { linkText = wlink; } } string linksMsg = onlineLinkCount + "/" + links.Count + " Links Online"; // Extract / Display Images From Page int downloaded = 0; if (AllOnline) { string SaveDir = ahk.AppDir() + "\\Posts\\RGP\\" + ahk.AddLeadingZeros(PageNum, 5).ToString(); ahk.FileCreateDir(SaveDir); ahk.Sleep(500); ahk.FileAppend(Tags, SaveDir + "\\Tags.txt"); ahk.FileAppend(title, SaveDir + "\\Title.txt"); ahk.FileAppend(linkText, SaveDir + "\\Links.txt"); downloaded = DownloadImages(imagehtml, SaveDir, ahk.AddLeadingZeros(PageNum, 5).ToString()); //tel.Update(txtResults, "Title: " + title + "\n\nTags: " + Tags + "\n\n" + linksMsg); } //ahk.MsgBox("Found " + links.Count + " Links\nDownloaded " + downloaded.ToString() + " + Images"); return(AllOnline); }
public List <TCP> Parse_TCPPage(string URL, string imageRoot = @"H:\SiteParse\TheClassic") { List <TCP> Films = new List <TCP>(); string xpath = "/html/body/div[3]/div/div[2]/div[4]/ul"; string html = ahk.Download_HTML(URL); List <string> resultLines = xml.Parse_HTML_XML(html, xpath); // extract sections of text from html xml foreach (string line in resultLines) // pull segments of page (20 total) { TCP Film = new TCP(); List <string> ImageLinks = new List <string>(); //string FilmURL = ""; string CoverURL = ""; string Title = ""; List<string> ImageLinks = new List<string>(); //string VidID = ""; string seg = line.Replace("\">", "\">\n"); List <string> newLines = lst.Text_To_List(seg); bool TitleFound = false; foreach (string newline in newLines) { //FILM URL string videoURLCheck = "thumb-video-link\" href=\"/videos"; if (newline.Contains(videoURLCheck)) { string videoURL = newline.Replace("<a class=\"thumb-video-link\" href=\"", ""); videoURL = videoURL.Replace("/\">", ""); Film.FilmURL = "https://theclassicporn.com" + videoURL; // + @"\"; //ahk.MsgBox("FilmURL: " + FilmURL); } // VIDEO ID if (newline.Contains("covers.jpg")) { List <string> items = ahk.StringSplit_List(newline, "/"); foreach (string item in items) { if (!item.Contains("covers.jpg")) { Film.VideoID = item; } if (item.Contains("covers.jpg")) { break; } } //ahk.MsgBox(vidID); } // VIDEO COVER URL if (newline.Contains("covers.jpg")) { List <string> items = ahk.StringSplit_List(newline, "\""); foreach (string item in items) { if (item.Contains("covers.jpg")) { Film.CoverURL = item; break; } } //ahk.MsgBox(CoverURL); } // VIDEO TITLE if (newline.Contains("class=\"link-blue link-no-border\"")) { TitleFound = true; continue; } if (TitleFound) { Film.FilmName = newline.Replace("</a>", ""); //ahk.MsgBox(Title); TitleFound = false; } if (newline.Contains(".jpg") && !newline.Contains("covers.jpg")) { string ImageLine = newline.Replace("\" alt=\"\">", ""); ImageLine = ImageLine.Replace("<img class=\"screen-thumb\" src=\"", ""); //ahk.MsgBox(ImageLine); ImageLinks.Add(ImageLine); } } string links = ""; if (imageRoot != "") { string saveDir = imageRoot + "\\" + Film.VideoID; ahk.FileCreateDir(saveDir); // download film images to dirs sorted by VidID foreach (string image in ImageLinks) { string fileName = ahk.StringSplit(image, "/", 0, true); ahk.Download_File(image, saveDir + "\\" + fileName); if (links == "") { links = image; } else { links = links + "\n" + image; } } // download film cover if (Film.CoverURL != null) { string filename = ahk.StringSplit(Film.CoverURL, "/", 0, true); ahk.Download_File(Film.CoverURL, saveDir + "\\" + filename); } } else { // add image links to string list foreach (string image in ImageLinks) { if (links == "") { links = image; } else { links = links + "\n" + image; } } } Film.ImageLinks = links; Film.ImgLinks = ImageLinks; Films.Add(Film); bool added = TCP_UpdateInsert(Film); } return(Films); }
public void Download_ReleaseBB_MovieIndex(int StartPage = 1, int LastPage = 1366) { _Parse.XML xml = new _Parse.XML(); _Lists lst = new _Lists(); _Sites.Imdb imdb = new Imdb(); string startURL = "http://rlsbb.ru/category/movies/page/"; int LastPageNum = LastPage; int pageNum = StartPage; do { string URL = startURL + pageNum + "/"; string HTML = ahk.Download_HTML(URL); string xmlPath = "//*[@id=\"contentArea\"]"; List <string> segs = xml.Parse_HTML_XML(HTML, xmlPath); // extract sections of text from html xml int segNum = 1; foreach (string seg in segs) { ReleaseBBMovies obj = new ReleaseBBMovies(); // skip segs on page that aren't posts if (segNum == 1) { segNum++; continue; } if (segNum == 2) { segNum++; continue; } if (segNum == 13) { segNum++; continue; } if (segNum == 14) { segNum++; continue; } //ahk.MsgBox(segNum + " | " + seg); segNum++; string PostTitle = ""; string PostURL = ""; string IMDbID = ""; List <string> links = imdb.Regex_IMDbLinks(seg); if (links.Count > 0) { IMDbID = imdb.IMDb_ID_FromURL(links[0]); } // parse index page segment List <string> lines = lst.Text_To_List(seg, true, true, false); foreach (string line in lines) { if (line.Contains("postTitle")) { PostURL = line.Replace("<h2 class=\"postTitle\"><span></span><a href=\"", ""); PostTitle = ahk.StringSplit(PostURL, ">", 1); PostTitle = ahk.StringSplit(PostTitle, "<", 0); PostURL = ahk.StringSplit(PostURL, "\"", 0); //ahk.MsgBox("IMDB: " + IMDbID + "\n\n" + PostTitle + "\n\n" + PostURL); } } obj.IMDbID = IMDbID; obj.PostTitle = PostTitle; obj.PostURL = PostURL; bool Updated = ReleaseBBMovies_UpdateInsert_IndexEntry(obj); } pageNum++; }while (pageNum <= LastPageNum); }