Ejemplo n.º 1
0
            public bool RGP(int PageNum = 1)
            {
                string url = "http://rgporn.com/index.php?newsid=" + PageNum;
                //tel.Update(txtURL, url);

                string html = web.DownloadHTML(url);

                if (html == "")
                {
                    return(false);
                }

                resultLines = xml.Parse_HTML_XML(html, "//body/div/div/div/div/table/tbody/tr/td/div/div/div/p");  // extract sections of text from html xml

                // extract tags
                string        display = "TAGS: ";
                List <string> tags    = new List <string>();
                string        Tags    = "";

                foreach (string line in resultLines)
                {
                    if (line.Contains("=tags"))
                    {
                        string tag = line.Replace("<a", "");
                        tag = tag.Replace("</a>", "");
                        tag = ahk.StringSplit(tag, ">", 1);
                        tag = tag.Trim();
                        tags.Add(tag);

                        display = display + "\n" + tag;

                        if (Tags != "")
                        {
                            Tags = Tags + "|" + tag;
                        }
                        if (Tags == "")
                        {
                            Tags = tag;
                        }
                    }
                }

                // extract title
                List <string> titleLines = xml.Parse_HTML_XML(html, "//body/div/div/div/div/table/tbody/tr/td/div/div/div");  // extract sections of text from html xml
                string        title      = lst.Return_List_Value(titleLines, 2);

                title = title.Replace("<h1 class=\"shead\">", "");
                title = title.Replace("</h1>", "");

                string imagehtml = "";

                foreach (string ima in titleLines)
                {
                    if (ima.Contains(".jpg"))
                    {
                        imagehtml = imagehtml + "\n" + ima;
                    }
                }

                //ahk.MsgBox(title);

                List <string> links = rg.Regex_RGLinks(html);

                bool   AllOnline       = true;
                int    onlineLinkCount = 0;
                string linkText        = "";

                foreach (string link in links)
                {
                    RGInfo info = rg.RapidGatorCheckStatus(link);

                    if (!info.FileOnline)
                    {
                        AllOnline = false;
                    }
                    else
                    {
                        onlineLinkCount++;
                    }

                    string wlink = link + "|" + info.FileSize;

                    if (linkText != "")
                    {
                        linkText = linkText + "\n" + wlink;
                    }
                    if (linkText == "")
                    {
                        linkText = wlink;
                    }
                }

                string linksMsg = onlineLinkCount + "/" + links.Count + " Links Online";

                // Extract / Display Images From Page
                int downloaded = 0;

                if (AllOnline)
                {
                    string SaveDir = ahk.AppDir() + "\\Posts\\RGP\\" + ahk.AddLeadingZeros(PageNum, 5).ToString();
                    ahk.FileCreateDir(SaveDir);
                    ahk.Sleep(500);

                    ahk.FileAppend(Tags, SaveDir + "\\Tags.txt");
                    ahk.FileAppend(title, SaveDir + "\\Title.txt");
                    ahk.FileAppend(linkText, SaveDir + "\\Links.txt");

                    downloaded = DownloadImages(imagehtml, SaveDir, ahk.AddLeadingZeros(PageNum, 5).ToString());

                    //tel.Update(txtResults, "Title: " + title + "\n\nTags: " + Tags + "\n\n" + linksMsg);
                }


                //ahk.MsgBox("Found " + links.Count + " Links\nDownloaded " + downloaded.ToString() + " + Images");


                return(AllOnline);
            }
Ejemplo n.º 2
0
            public List <TCP> Parse_TCPPage(string URL, string imageRoot = @"H:\SiteParse\TheClassic")
            {
                List <TCP> Films = new List <TCP>();

                string        xpath       = "/html/body/div[3]/div/div[2]/div[4]/ul";
                string        html        = ahk.Download_HTML(URL);
                List <string> resultLines = xml.Parse_HTML_XML(html, xpath); // extract sections of text from html xml

                foreach (string line in resultLines)                         // pull segments of page (20 total)
                {
                    TCP Film = new TCP();

                    List <string> ImageLinks = new List <string>();

                    //string FilmURL = ""; string CoverURL = ""; string Title = ""; List<string> ImageLinks = new List<string>();
                    //string VidID = "";


                    string        seg = line.Replace("\">", "\">\n");
                    List <string> newLines = lst.Text_To_List(seg); bool TitleFound = false;
                    foreach (string newline in newLines)
                    {
                        //FILM URL
                        string videoURLCheck = "thumb-video-link\" href=\"/videos";
                        if (newline.Contains(videoURLCheck))
                        {
                            string videoURL = newline.Replace("<a class=\"thumb-video-link\" href=\"", "");
                            videoURL     = videoURL.Replace("/\">", "");
                            Film.FilmURL = "https://theclassicporn.com" + videoURL; // + @"\";
                                                                                    //ahk.MsgBox("FilmURL: " + FilmURL);
                        }

                        // VIDEO ID
                        if (newline.Contains("covers.jpg"))
                        {
                            List <string> items = ahk.StringSplit_List(newline, "/");
                            foreach (string item in items)
                            {
                                if (!item.Contains("covers.jpg"))
                                {
                                    Film.VideoID = item;
                                }
                                if (item.Contains("covers.jpg"))
                                {
                                    break;
                                }
                            }
                            //ahk.MsgBox(vidID);
                        }

                        // VIDEO COVER URL
                        if (newline.Contains("covers.jpg"))
                        {
                            List <string> items = ahk.StringSplit_List(newline, "\"");
                            foreach (string item in items)
                            {
                                if (item.Contains("covers.jpg"))
                                {
                                    Film.CoverURL = item; break;
                                }
                            }
                            //ahk.MsgBox(CoverURL);
                        }

                        // VIDEO TITLE
                        if (newline.Contains("class=\"link-blue link-no-border\""))
                        {
                            TitleFound = true; continue;
                        }
                        if (TitleFound)
                        {
                            Film.FilmName = newline.Replace("</a>", "");
                            //ahk.MsgBox(Title);
                            TitleFound = false;
                        }


                        if (newline.Contains(".jpg") && !newline.Contains("covers.jpg"))
                        {
                            string ImageLine = newline.Replace("\" alt=\"\">", "");
                            ImageLine = ImageLine.Replace("<img class=\"screen-thumb\" src=\"", "");
                            //ahk.MsgBox(ImageLine);
                            ImageLinks.Add(ImageLine);
                        }
                    }


                    string links = "";
                    if (imageRoot != "")
                    {
                        string saveDir = imageRoot + "\\" + Film.VideoID;
                        ahk.FileCreateDir(saveDir);

                        // download film images to dirs sorted by VidID
                        foreach (string image in ImageLinks)
                        {
                            string fileName = ahk.StringSplit(image, "/", 0, true);
                            ahk.Download_File(image, saveDir + "\\" + fileName);

                            if (links == "")
                            {
                                links = image;
                            }
                            else
                            {
                                links = links + "\n" + image;
                            }
                        }

                        // download film cover
                        if (Film.CoverURL != null)
                        {
                            string filename = ahk.StringSplit(Film.CoverURL, "/", 0, true);
                            ahk.Download_File(Film.CoverURL, saveDir + "\\" + filename);
                        }
                    }
                    else
                    {
                        // add image links to string list
                        foreach (string image in ImageLinks)
                        {
                            if (links == "")
                            {
                                links = image;
                            }
                            else
                            {
                                links = links + "\n" + image;
                            }
                        }
                    }



                    Film.ImageLinks = links;
                    Film.ImgLinks   = ImageLinks;
                    Films.Add(Film);

                    bool added = TCP_UpdateInsert(Film);
                }


                return(Films);
            }
Ejemplo n.º 3
0
            public void Download_ReleaseBB_MovieIndex(int StartPage = 1, int LastPage = 1366)
            {
                _Parse.XML xml = new _Parse.XML();
                _Lists     lst = new _Lists();

                _Sites.Imdb imdb = new Imdb();

                string startURL    = "http://rlsbb.ru/category/movies/page/";
                int    LastPageNum = LastPage;

                int pageNum = StartPage;

                do
                {
                    string URL  = startURL + pageNum + "/";
                    string HTML = ahk.Download_HTML(URL);

                    string xmlPath = "//*[@id=\"contentArea\"]";

                    List <string> segs = xml.Parse_HTML_XML(HTML, xmlPath);  // extract sections of text from html xml

                    int segNum = 1;
                    foreach (string seg in segs)
                    {
                        ReleaseBBMovies obj = new ReleaseBBMovies();

                        // skip segs on page that aren't posts
                        if (segNum == 1)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 2)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 13)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 14)
                        {
                            segNum++; continue;
                        }

                        //ahk.MsgBox(segNum + " | " + seg);
                        segNum++;

                        string PostTitle = "";
                        string PostURL   = "";
                        string IMDbID    = "";

                        List <string> links = imdb.Regex_IMDbLinks(seg);

                        if (links.Count > 0)
                        {
                            IMDbID = imdb.IMDb_ID_FromURL(links[0]);
                        }

                        // parse index page segment
                        List <string> lines = lst.Text_To_List(seg, true, true, false);
                        foreach (string line in lines)
                        {
                            if (line.Contains("postTitle"))
                            {
                                PostURL   = line.Replace("<h2 class=\"postTitle\"><span></span><a href=\"", "");
                                PostTitle = ahk.StringSplit(PostURL, ">", 1);
                                PostTitle = ahk.StringSplit(PostTitle, "<", 0);
                                PostURL   = ahk.StringSplit(PostURL, "\"", 0);
                                //ahk.MsgBox("IMDB: " + IMDbID + "\n\n" + PostTitle + "\n\n" + PostURL);
                            }
                        }

                        obj.IMDbID    = IMDbID;
                        obj.PostTitle = PostTitle;
                        obj.PostURL   = PostURL;

                        bool Updated = ReleaseBBMovies_UpdateInsert_IndexEntry(obj);
                    }

                    pageNum++;
                }while (pageNum <= LastPageNum);
            }