public List <TCP> Parse_TCPPage(string URL, string imageRoot = @"H:\SiteParse\TheClassic") { List <TCP> Films = new List <TCP>(); string xpath = "/html/body/div[3]/div/div[2]/div[4]/ul"; string html = ahk.Download_HTML(URL); List <string> resultLines = xml.Parse_HTML_XML(html, xpath); // extract sections of text from html xml foreach (string line in resultLines) // pull segments of page (20 total) { TCP Film = new TCP(); List <string> ImageLinks = new List <string>(); //string FilmURL = ""; string CoverURL = ""; string Title = ""; List<string> ImageLinks = new List<string>(); //string VidID = ""; string seg = line.Replace("\">", "\">\n"); List <string> newLines = lst.Text_To_List(seg); bool TitleFound = false; foreach (string newline in newLines) { //FILM URL string videoURLCheck = "thumb-video-link\" href=\"/videos"; if (newline.Contains(videoURLCheck)) { string videoURL = newline.Replace("<a class=\"thumb-video-link\" href=\"", ""); videoURL = videoURL.Replace("/\">", ""); Film.FilmURL = "https://theclassicporn.com" + videoURL; // + @"\"; //ahk.MsgBox("FilmURL: " + FilmURL); } // VIDEO ID if (newline.Contains("covers.jpg")) { List <string> items = ahk.StringSplit_List(newline, "/"); foreach (string item in items) { if (!item.Contains("covers.jpg")) { Film.VideoID = item; } if (item.Contains("covers.jpg")) { break; } } //ahk.MsgBox(vidID); } // VIDEO COVER URL if (newline.Contains("covers.jpg")) { List <string> items = ahk.StringSplit_List(newline, "\""); foreach (string item in items) { if (item.Contains("covers.jpg")) { Film.CoverURL = item; break; } } //ahk.MsgBox(CoverURL); } // VIDEO TITLE if (newline.Contains("class=\"link-blue link-no-border\"")) { TitleFound = true; continue; } if (TitleFound) { Film.FilmName = newline.Replace("</a>", ""); //ahk.MsgBox(Title); TitleFound = false; } if (newline.Contains(".jpg") && !newline.Contains("covers.jpg")) { string ImageLine = newline.Replace("\" alt=\"\">", ""); ImageLine = ImageLine.Replace("<img class=\"screen-thumb\" src=\"", ""); //ahk.MsgBox(ImageLine); ImageLinks.Add(ImageLine); } } string links = ""; if (imageRoot != "") { string saveDir = imageRoot + "\\" + Film.VideoID; ahk.FileCreateDir(saveDir); // download film images to dirs sorted by VidID foreach (string image in ImageLinks) { string fileName = ahk.StringSplit(image, "/", 0, true); ahk.Download_File(image, saveDir + "\\" + fileName); if (links == "") { links = image; } else { links = links + "\n" + image; } } // download film cover if (Film.CoverURL != null) { string filename = ahk.StringSplit(Film.CoverURL, "/", 0, true); ahk.Download_File(Film.CoverURL, saveDir + "\\" + filename); } } else { // add image links to string list foreach (string image in ImageLinks) { if (links == "") { links = image; } else { links = links + "\n" + image; } } } Film.ImageLinks = links; Film.ImgLinks = ImageLinks; Films.Add(Film); bool added = TCP_UpdateInsert(Film); } return(Films); }
//_Parse.XML xml = new _Parse.XML(); //_AHK ahk = new _AHK(); //_Database.SQL sql = new _Database.SQL(); //_Lists lst = new _Lists(); //_Parse prs = new _Parse(); ////_Images img = new _Images(); //_TelerikLib.RadProgress pro = new _TelerikLib.RadProgress(); //_TelerikLib tel = new _TelerikLib(); ////_Apps.Chrome cr = new _Apps.Chrome(); //sharpAHK_Dev._Threads thr = new sharpAHK_Dev._Threads(); //_TelerikLib.RadTree tree = new _TelerikLib.RadTree(); //IAFD iafd = new IAFD(); //_Web.ADBSites.PBBForum pbb = new _Web.ADBSites.PBBForum(); //_Web.ADBSites.PRNWForum prnw = new _Web.ADBSites.PRNWForum(); #endregion public void Download_Site(int startPage = 1, int LastPage = 230, bool SkipExisting = true, RadProgressBar Bar = null, RadProgressBar Bar2 = null, bool NewThread = true) { _AHK ahk = new _AHK(); _Lists lst = new _Lists(); _Sites.RapidGator rg = new _Sites.RapidGator(); _TelerikLib.RadProgress pro = new _TelerikLib.RadProgress(); if (NewThread) { Thread newThread = new Thread(() => Download_Site(startPage, LastPage, SkipExisting, Bar, Bar2, false)); // Function To Execute newThread.IsBackground = true; newThread.Start(); } else { int pageNum = startPage; do { int postNum = 0; string html = ahk.Download_HTML("http://pornchil.com/page/" + pageNum + "/"); List <string> lines = lst.Text_To_List(html, true, true, false); if (Bar != null) { pro.SetupProgressBar(Bar, 10); } // # of posts/page foreach (string line in lines) { if (line.Contains("<h1 class=\"entry-title\"><a href=")) { PrnChill chill = new PrnChill(); //ahk.MsgBox(line); string Line = line.Replace("<h1 class=\"entry-title\"><a href=\"", ""); chill.PostURL = ahk.StringSplit(Line, "\"", 0); chill.PostName = ahk.StringSplit(Line, ">", 1); chill.PostName = ahk.StringSplit(chill.PostName, "<", 0); if (SkipExisting) { if (AlreadyParsed(chill.PostURL)) { continue; } // check to see if this post already has an entry, if so skip } if (Bar != null) { postNum++; pro.UpdateProgress(Bar, postNum + "/10"); } // # of posts remaining string postHTML = ahk.Download_HTML(chill.PostURL); List <string> links = rg.Regex_RGLinks(postHTML); if (links.Count > 0) { if (Bar != null) { pro.ProgressText(Bar, postNum + "/10 | Verifying Links"); } // # of posts remaining List <_Sites.RapidGator.RGInfo> checkedLinks = rg.RapidGator_BatchCheck(links, true, Bar2); // batch check list of rg links string rglinks = ""; int goodLinkCount = 0; foreach (_Sites.RapidGator.RGInfo link in checkedLinks) { if (rglinks == "") { rglinks = link.FileURL + "|" + link.FileSize; goodLinkCount++; } else { rglinks = rglinks + "\n" + link.FileURL + "|" + link.FileSize; goodLinkCount++; } } chill.Links = rglinks; // online rg links with filepath|filesize return chill.LinkCount = goodLinkCount; chill.LinkCheckDate = DateTime.Now.ToString(); } chill.InCollection = "false"; List <string> images = rg.JpgImageLinks(postHTML); if (Bar != null) { pro.ProgressText(Bar, postNum + "/10 | Downloading Images (" + images.Count + ")"); } // # of posts remaining if (images.Count > 0) { // create save dir string saveDir = ahk.AppDir() + "\\PrnChillPosts"; ahk.FileCreateDir(saveDir); saveDir = saveDir + "\\" + chill.PostName; ahk.FileCreateDir(saveDir); chill.ImageDir = saveDir; int imgNum = 1; foreach (string image in images) { ahk.Download_File(image, saveDir + "\\" + imgNum + ".jpg", true); imgNum++; } } PrnChill_UpdateInsert(chill); } } pageNum++; } while (pageNum <= LastPage); } }